diff --git a/PACK/enchant/__init__.py b/PACK/enchant/__init__.py deleted file mode 100644 index 46f7487..0000000 --- a/PACK/enchant/__init__.py +++ /dev/null @@ -1,907 +0,0 @@ -# pyenchant -# -# Copyright (C) 2004-2011, Ryan Kelly -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPsE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the -# Free Software Foundation, Inc., 59 Temple Place - Suite 330, -# Boston, MA 02111-1307, USA. -# -# In addition, as a special exception, you are -# given permission to link the code of this program with -# non-LGPL Spelling Provider libraries (eg: a MSFT Office -# spell checker backend) and distribute linked combinations including -# the two. You must obey the GNU Lesser General Public License in all -# respects for all of the code used other than said providers. If you modify -# this file, you may extend this exception to your version of the -# file, but you are not obligated to do so. If you do not wish to -# do so, delete this exception statement from your version. -# -""" -enchant: Access to the enchant spellchecking library -===================================================== - -This module provides several classes for performing spell checking -via the Enchant spellchecking library. For more details on Enchant, -visit the project website: - - http://www.abisource.com/enchant/ - -Spellchecking is performed using 'Dict' objects, which represent -a language dictionary. Their use is best demonstrated by a quick -example:: - - >>> import enchant - >>> d = enchant.Dict("en_US") # create dictionary for US English - >>> d.check("enchant") - True - >>> d.check("enchnt") - False - >>> d.suggest("enchnt") - ['enchant', 'enchants', 'enchanter', 'penchant', 'incant', 'enchain', 'enchanted'] - -Languages are identified by standard string tags such as "en" (English) -and "fr" (French). Specific language dialects can be specified by -including an additional code - for example, "en_AU" refers to Australian -English. The later form is preferred as it is more widely supported. - -To check whether a dictionary exists for a given language, the function -'dict_exists' is available. Dictionaries may also be created using the -function 'request_dict'. - -A finer degree of control over the dictionaries and how they are created -can be obtained using one or more 'Broker' objects. These objects are -responsible for locating dictionaries for a specific language. - -In Python 2.x, unicode strings are supported transparently in the -standard manner - if a unicode string is given as an argument, the -result will be a unicode string. Note that Enchant works in UTF-8 -internally, so passing an ASCII string to a dictionary for a language -requiring Unicode may result in UTF-8 strings being returned. - -In Python 3.x unicode strings are expected throughout. Bytestrings -should not be passed into any functions. - -Errors that occur in this module are reported by raising subclasses -of 'Error'. - -""" -_DOC_ERRORS = ['enchnt', 'enchnt', 'fr'] - -# Make version info available -__ver_major__ = 1 -__ver_minor__ = 6 -__ver_patch__ = 6 -__ver_sub__ = "" -__version__ = "%d.%d.%d%s" % (__ver_major__, __ver_minor__, - __ver_patch__, __ver_sub__) - -import os - -try: - from enchant import _enchant as _e -except ImportError: - if not os.environ.get("PYENCHANT_IGNORE_MISSING_LIB", False): - raise - _e = None - -from enchant.errors import * -from enchant.utils import EnchantStr, get_default_language -from enchant.pypwl import PyPWL - -# Due to the unfortunate name collision between the enchant "tokenize" module -# and the stdlib "tokenize" module, certain values of sys.path can cause -# the former to override the latter and break the "warnings" module. -# This hacks around it by making a dumming "warnings" module. -try: - import warnings -except ImportError: - class warnings(object): - def warn(self, *args, **kwds): - pass - - - warnings = warnings() - - -class ProviderDesc(object): - """Simple class describing an Enchant provider. - - Each provider has the following information associated with it: - - * name: Internal provider name (e.g. "aspell") - * desc: Human-readable description (e.g. "Aspell Provider") - * file: Location of the library containing the provider - - """ - _DOC_ERRORS = ["desc"] - - def __init__(self, name, desc, file): - self.name = name - self.desc = desc - self.file = file - - def __str__(self): - return "" % self.desc - - def __repr__(self): - return str(self) - - def __eq__(self, pd): - """Equality operator on ProviderDesc objects.""" - return (self.name == pd.name and \ - self.desc == pd.desc and \ - self.file == pd.file) - - def __hash__(self): - """Hash operator on ProviderDesc objects.""" - return hash(self.name + self.desc + self.file) - - -class _EnchantObject(object): - """Base class for enchant objects. - - This class implements some general functionality for interfacing with - the '_enchant' C-library in a consistent way. All public objects - from the 'enchant' module are subclasses of this class. - - All enchant objects have an attribute '_this' which contains the - pointer to the underlying C-library object. The method '_check_this' - can be called to ensure that this point is not None, raising an - exception if it is. - """ - - def __init__(self): - """_EnchantObject constructor.""" - self._this = None - # To be importable when enchant C lib is missing, we need - # to create a dummy default broker. - if _e is not None: - self._init_this() - - def _check_this(self, msg=None): - """Check that self._this is set to a pointer, rather than None.""" - if msg is None: - msg = "%s unusable: the underlying C-library object has been freed." - msg = msg % (self.__class__.__name__,) - if self._this is None: - raise Error(msg) - - def _init_this(self): - """Initialise the underlying C-library object pointer.""" - raise NotImplementedError - - def _raise_error(self, default="Unspecified Error", eclass=Error): - """Raise an exception based on available error messages. - - This method causes an Error to be raised. Subclasses should - override it to retrieve an error indication from the underlying - API if possible. If such a message cannot be retrieved, the - argument value is used. The class of the exception - can be specified using the argument - """ - raise eclass(default) - - _raise_error._DOC_ERRORS = ["eclass"] - - def __getstate__(self): - """Customize pickling of PyEnchant objects. - - Since it's not safe for multiple objects to share the same C-library - object, we make sure it's unset when pickling. - """ - state = self.__dict__.copy() - state["_this"] = None - return state - - def __setstate__(self, state): - self.__dict__.update(state) - self._init_this() - - -class Broker(_EnchantObject): - """Broker object for the Enchant spellchecker. - - Broker objects are responsible for locating and managing dictionaries. - Unless custom functionality is required, there is no need to use Broker - objects directly. The 'enchant' module provides a default broker object - so that 'Dict' objects can be created directly. - - The most important methods of this class include: - - * dict_exists: check existence of a specific language dictionary - * request_dict: obtain a dictionary for specific language - * set_ordering: specify which dictionaries to try for for a - given language. - - """ - - def __init__(self): - """Broker object constructor. - - This method is the constructor for the 'Broker' object. No - arguments are required. - """ - _EnchantObject.__init__(self) - - def _init_this(self): - self._this = _e.broker_init() - if not self._this: - raise Error("Could not initialise an enchant broker.") - - def __del__(self): - """Broker object destructor.""" - if _e is not None: - self._free() - - def _raise_error(self, default="Unspecified Error", eclass=Error): - """Overrides _EnchantObject._raise_error to check broker errors.""" - err = _e.broker_get_error(self._this) - if err == "" or err is None: - raise eclass(default) - raise eclass(err) - - def _free(self): - """Free system resource associated with a Broker object. - - This method can be called to free the underlying system resources - associated with a Broker object. It is called automatically when - the object is garbage collected. If called explicitly, the - Broker and any associated Dict objects must no longer be used. - """ - if self._this is not None: - _e.broker_free(self._this) - self._this = None - - def request_dict(self, tag=None): - """Request a Dict object for the language specified by . - - This method constructs and returns a Dict object for the - requested language. 'tag' should be a string of the appropriate - form for specifying a language, such as "fr" (French) or "en_AU" - (Australian English). The existence of a specific language can - be tested using the 'dict_exists' method. - - If is not given or is None, an attempt is made to determine - the current language in use. If this cannot be determined, Error - is raised. - - NOTE: this method is functionally equivalent to calling the Dict() - constructor and passing in the argument. - - """ - return Dict(tag, self) - - request_dict._DOC_ERRORS = ["fr"] - - def _request_dict_data(self, tag): - """Request raw C pointer data for a dictionary. - - This method call passes on the call to the C library, and does - some internal bookkeeping. - """ - self._check_this() - tag = EnchantStr(tag) - new_dict = _e.broker_request_dict(self._this, tag.encode()) - if new_dict is None: - eStr = "Dictionary for language '%s' could not be found" - self._raise_error(eStr % (tag,), DictNotFoundError) - return new_dict - - def request_pwl_dict(self, pwl): - """Request a Dict object for a personal word list. - - This method behaves as 'request_dict' but rather than returning - a dictionary for a specific language, it returns a dictionary - referencing a personal word list. A personal word list is a file - of custom dictionary entries, one word per line. - """ - self._check_this() - pwl = EnchantStr(pwl) - new_dict = _e.broker_request_pwl_dict(self._this, pwl.encode()) - if new_dict is None: - eStr = "Personal Word List file '%s' could not be loaded" - self._raise_error(eStr % (pwl,)) - d = Dict(False) - d._switch_this(new_dict, self) - return d - - def _free_dict(self, dict): - """Free memory associated with a dictionary. - - This method frees system resources associated with a Dict object. - It is equivalent to calling the object's 'free' method. Once this - method has been called on a dictionary, it must not be used again. - """ - self._check_this() - _e.broker_free_dict(self._this, dict._this) - dict._this = None - dict._broker = None - - def dict_exists(self, tag): - """Check availability of a dictionary. - - This method checks whether there is a dictionary available for - the language specified by 'tag'. It returns True if a dictionary - is available, and False otherwise. - """ - self._check_this() - tag = EnchantStr(tag) - val = _e.broker_dict_exists(self._this, tag.encode()) - return bool(val) - - def set_ordering(self, tag, ordering): - """Set dictionary preferences for a language. - - The Enchant library supports the use of multiple dictionary programs - and multiple languages. This method specifies which dictionaries - the broker should prefer when dealing with a given language. 'tag' - must be an appropriate language specification and 'ordering' is a - string listing the dictionaries in order of preference. For example - a valid ordering might be "aspell,myspell,ispell". - The value of 'tag' can also be set to "*" to set a default ordering - for all languages for which one has not been set explicitly. - """ - self._check_this() - tag = EnchantStr(tag) - ordering = EnchantStr(ordering) - _e.broker_set_ordering(self._this, tag.encode(), ordering.encode()) - - def describe(self): - """Return list of provider descriptions. - - This method returns a list of descriptions of each of the - dictionary providers available. Each entry in the list is a - ProviderDesc object. - """ - self._check_this() - self.__describe_result = [] - _e.broker_describe(self._this, self.__describe_callback) - return [ProviderDesc(*r) for r in self.__describe_result] - - def __describe_callback(self, name, desc, file): - """Collector callback for dictionary description. - - This method is used as a callback into the _enchant function - 'enchant_broker_describe'. It collects the given arguments in - a tuple and appends them to the list '__describe_result'. - """ - s = EnchantStr("") - name = s.decode(name) - desc = s.decode(desc) - file = s.decode(file) - self.__describe_result.append((name, desc, file)) - - def list_dicts(self): - """Return list of available dictionaries. - - This method returns a list of dictionaries available to the - broker. Each entry in the list is a two-tuple of the form: - - (tag,provider) - - where is the language lag for the dictionary and - is a ProviderDesc object describing the provider - through which that dictionary can be obtained. - """ - self._check_this() - self.__list_dicts_result = [] - _e.broker_list_dicts(self._this, self.__list_dicts_callback) - return [(r[0], ProviderDesc(*r[1])) for r in self.__list_dicts_result] - - def __list_dicts_callback(self, tag, name, desc, file): - """Collector callback for listing dictionaries. - - This method is used as a callback into the _enchant function - 'enchant_broker_list_dicts'. It collects the given arguments into - an appropriate tuple and appends them to '__list_dicts_result'. - """ - s = EnchantStr("") - tag = s.decode(tag) - name = s.decode(name) - desc = s.decode(desc) - file = s.decode(file) - self.__list_dicts_result.append((tag, (name, desc, file))) - - def list_languages(self): - """List languages for which dictionaries are available. - - This function returns a list of language tags for which a - dictionary is available. - """ - langs = [] - for (tag, prov) in self.list_dicts(): - if tag not in langs: - langs.append(tag) - return langs - - def __describe_dict(self, dict_data): - """Get the description tuple for a dict data object. - must be a C-library pointer to an enchant dictionary. - The return value is a tuple of the form: - (,,,) - """ - # Define local callback function - cb_result = [] - - def cb_func(tag, name, desc, file): - s = EnchantStr("") - tag = s.decode(tag) - name = s.decode(name) - desc = s.decode(desc) - file = s.decode(file) - cb_result.append((tag, name, desc, file)) - - # Actually call the describer function - _e.dict_describe(dict_data, cb_func) - return cb_result[0] - - __describe_dict._DOC_ERRORS = ["desc"] - - def get_param(self, name): - """Get the value of a named parameter on this broker. - - Parameters are used to provide runtime information to individual - provider backends. See the method 'set_param' for more details. - """ - name = EnchantStr(name) - return name.decode(_e.broker_get_param(self._this, name.encode())) - - get_param._DOC_ERRORS = ["param"] - - def set_param(self, name, value): - """Set the value of a named parameter on this broker. - - Parameters are used to provide runtime information to individual - provider backends. For example, the myspell provider will search - any directories given in the "enchant.myspell.dictionary.path" - parameter when looking for its dictionary files. - """ - name = EnchantStr(name) - value = EnchantStr(value) - _e.broker_set_param(self._this, name.encode(), value.encode()) - - -class Dict(_EnchantObject): - """Dictionary object for the Enchant spellchecker. - - Dictionary objects are responsible for checking the spelling of words - and suggesting possible corrections. Each dictionary is owned by a - Broker object, but unless a new Broker has explicitly been created - then this will be the 'enchant' module default Broker and is of little - interest. - - The important methods of this class include: - - * check(): check whether a word id spelled correctly - * suggest(): suggest correct spellings for a word - * add(): add a word to the user's personal dictionary - * remove(): add a word to the user's personal exclude list - * add_to_session(): add a word to the current spellcheck session - * store_replacement(): indicate a replacement for a given word - - Information about the dictionary is available using the following - attributes: - - * tag: the language tag of the dictionary - * provider: a ProviderDesc object for the dictionary provider - - """ - - def __init__(self, tag=None, broker=None): - """Dict object constructor. - - A dictionary belongs to a specific language, identified by the - string . If the tag is not given or is None, an attempt to - determine the language currently in use is made using the 'locale' - module. If the current language cannot be determined, Error is raised. - - If is instead given the value of False, a 'dead' Dict object - is created without any reference to a language. This is typically - only useful within PyEnchant itself. Any other non-string value - for raises Error. - - Each dictionary must also have an associated Broker object which - obtains the dictionary information from the underlying system. This - may be specified using . If not given, the default broker - is used. - """ - # Initialise misc object attributes to None - self.provider = None - # If no tag was given, use the default language - if tag is None: - tag = get_default_language() - if tag is None: - err = "No tag specified and default language could not " - err = err + "be determined." - raise Error(err) - self.tag = tag - # If no broker was given, use the default broker - if broker is None: - broker = _broker - self._broker = broker - # Now let the superclass initialise the C-library object - _EnchantObject.__init__(self) - - def _init_this(self): - # Create dead object if False was given. - # Otherwise, use the broker to get C-library pointer data. - self._this = None - if self.tag: - this = self._broker._request_dict_data(self.tag) - self._switch_this(this, self._broker) - - def __del__(self): - """Dict object destructor.""" - # Calling free() might fail if python is shutting down - try: - self._free() - except AttributeError: - pass - - def _switch_this(self, this, broker): - """Switch the underlying C-library pointer for this object. - - As all useful state for a Dict is stored by the underlying C-library - pointer, it is very convenient to allow this to be switched at - run-time. Pass a new dict data object into this method to affect - the necessary changes. The creating Broker object (at the Python - level) must also be provided. - - This should *never* *ever* be used by application code. It's - a convenience for developers only, replacing the clunkier - parameter to __init__ from earlier versions. - """ - # Free old dict data - Dict._free(self) - # Hook in the new stuff - self._this = this - self._broker = broker - # Update object properties - desc = self.__describe(check_this=False) - self.tag = desc[0] - self.provider = ProviderDesc(*desc[1:]) - - _switch_this._DOC_ERRORS = ["init"] - - def _check_this(self, msg=None): - """Extend _EnchantObject._check_this() to check Broker validity. - - It is possible for the managing Broker object to be freed without - freeing the Dict. Thus validity checking must take into account - self._broker._this as well as self._this. - """ - if self._broker is None or self._broker._this is None: - self._this = None - _EnchantObject._check_this(self, msg) - - def _raise_error(self, default="Unspecified Error", eclass=Error): - """Overrides _EnchantObject._raise_error to check dict errors.""" - err = _e.dict_get_error(self._this) - if err == "" or err is None: - raise eclass(default) - raise eclass(err) - - def _free(self): - """Free the system resources associated with a Dict object. - - This method frees underlying system resources for a Dict object. - Once it has been called, the Dict object must no longer be used. - It is called automatically when the object is garbage collected. - """ - if self._broker is not None and self._this is not None: - self._broker._free_dict(self) - - def check(self, word): - """Check spelling of a word. - - This method takes a word in the dictionary language and returns - True if it is correctly spelled, and false otherwise. - """ - self._check_this() - word = EnchantStr(word) - val = _e.dict_check(self._this, word.encode()) - if val == 0: - return True - if val > 0: - return False - self._raise_error() - - def suggest(self, word): - """Suggest possible spellings for a word. - - This method tries to guess the correct spelling for a given - word, returning the possibilities in a list. - """ - self._check_this() - word = EnchantStr(word) - suggs = _e.dict_suggest(self._this, word.encode()) - return [word.decode(w) for w in suggs] - - def add(self, word): - """Add a word to the user's personal word list.""" - self._check_this() - word = EnchantStr(word) - _e.dict_add(self._this, word.encode()) - - def remove(self, word): - """Add a word to the user's personal exclude list.""" - self._check_this() - word = EnchantStr(word) - _e.dict_remove(self._this, word.encode()) - - def add_to_pwl(self, word): - """Add a word to the user's personal word list.""" - warnings.warn("Dict.add_to_pwl is deprecated, please use Dict.add", - category=DeprecationWarning, stacklevel=2) - self._check_this() - word = EnchantStr(word) - _e.dict_add_to_pwl(self._this, word.encode()) - - def add_to_session(self, word): - """Add a word to the session personal list.""" - self._check_this() - word = EnchantStr(word) - _e.dict_add_to_session(self._this, word.encode()) - - def remove_from_session(self, word): - """Add a word to the session exclude list.""" - self._check_this() - word = EnchantStr(word) - _e.dict_remove_from_session(self._this, word.encode()) - - def is_added(self, word): - """Check whether a word is in the personal word list.""" - self._check_this() - word = EnchantStr(word) - return _e.dict_is_added(self._this, word.encode()) - - def is_removed(self, word): - """Check whether a word is in the personal exclude list.""" - self._check_this() - word = EnchantStr(word) - return _e.dict_is_removed(self._this, word.encode()) - - def is_in_session(self, word): - """Check whether a word is in the session list.""" - warnings.warn("Dict.is_in_session is deprecated, " \ - "please use Dict.is_added", - category=DeprecationWarning, stacklevel=2) - self._check_this() - word = EnchantStr(word) - return _e.dict_is_in_session(self._this, word.encode()) - - def store_replacement(self, mis, cor): - """Store a replacement spelling for a miss-spelled word. - - This method makes a suggestion to the spellchecking engine that the - miss-spelled word is in fact correctly spelled as . Such - a suggestion will typically mean that appears early in the - list of suggested spellings offered for later instances of . - """ - if not mis: - raise ValueError("can't store replacement for an empty string") - if not cor: - raise ValueError("can't store empty string as a replacement") - self._check_this() - mis = EnchantStr(mis) - cor = EnchantStr(cor) - _e.dict_store_replacement(self._this, mis.encode(), cor.encode()) - - store_replacement._DOC_ERRORS = ["mis", "mis"] - - def __describe(self, check_this=True): - """Return a tuple describing the dictionary. - - This method returns a four-element tuple describing the underlying - spellchecker system providing the dictionary. It will contain the - following strings: - - * language tag - * name of dictionary provider - * description of dictionary provider - * dictionary file - - Direct use of this method is not recommended - instead, access this - information through the 'tag' and 'provider' attributes. - """ - if check_this: - self._check_this() - _e.dict_describe(self._this, self.__describe_callback) - return self.__describe_result - - def __describe_callback(self, tag, name, desc, file): - """Collector callback for dictionary description. - - This method is used as a callback into the _enchant function - 'enchant_dict_describe'. It collects the given arguments in - a tuple and stores them in the attribute '__describe_result'. - """ - s = EnchantStr("") - tag = s.decode(tag) - name = s.decode(name) - desc = s.decode(desc) - file = s.decode(file) - self.__describe_result = (tag, name, desc, file) - - -class DictWithPWL(Dict): - """Dictionary with separately-managed personal word list. - - NOTE: As of version 1.4.0, enchant manages a per-user pwl and - exclude list. This class is now only needed if you want - to explicitly maintain a separate word list in addition to - the default one. - - This class behaves as the standard Dict class, but also manages a - personal word list stored in a separate file. The file must be - specified at creation time by the 'pwl' argument to the constructor. - Words added to the dictionary are automatically appended to the pwl file. - - A personal exclude list can also be managed, by passing another filename - to the constructor in the optional 'pel' argument. If this is not given, - requests to exclude words are ignored. - - If either 'pwl' or 'pel' are None, an in-memory word list is used. - This will prevent calls to add() and remove() from affecting the user's - default word lists. - - The Dict object managing the PWL is available as the 'pwl' attribute. - The Dict object managing the PEL is available as the 'pel' attribute. - - To create a DictWithPWL from the user's default language, use None - as the 'tag' argument. - """ - _DOC_ERRORS = ["pel", "pel", "PEL", "pel"] - - def __init__(self, tag, pwl=None, pel=None, broker=None): - """DictWithPWL constructor. - - The argument 'pwl', if not None, names a file containing the - personal word list. If this file does not exist, it is created - with default permissions. - - The argument 'pel', if not None, names a file containing the personal - exclude list. If this file does not exist, it is created with - default permissions. - """ - Dict.__init__(self, tag, broker) - if pwl is not None: - if not os.path.exists(pwl): - f = open(pwl, "wt") - f.close() - del f - self.pwl = self._broker.request_pwl_dict(pwl) - else: - self.pwl = PyPWL() - if pel is not None: - if not os.path.exists(pel): - f = open(pel, "wt") - f.close() - del f - self.pel = self._broker.request_pwl_dict(pel) - else: - self.pel = PyPWL() - - def _check_this(self, msg=None): - """Extend Dict._check_this() to check PWL validity.""" - if self.pwl is None: - self._free() - if self.pel is None: - self._free() - Dict._check_this(self, msg) - self.pwl._check_this(msg) - self.pel._check_this(msg) - - def _free(self): - """Extend Dict._free() to free the PWL as well.""" - if self.pwl is not None: - self.pwl._free() - self.pwl = None - if self.pel is not None: - self.pel._free() - self.pel = None - Dict._free(self) - - def check(self, word): - """Check spelling of a word. - - This method takes a word in the dictionary language and returns - True if it is correctly spelled, and false otherwise. It checks - both the dictionary and the personal word list. - """ - if self.pel.check(word): - return False - if self.pwl.check(word): - return True - if Dict.check(self, word): - return True - return False - - def suggest(self, word): - """Suggest possible spellings for a word. - - This method tries to guess the correct spelling for a given - word, returning the possibilities in a list. - """ - suggs = Dict.suggest(self, word) - suggs.extend([w for w in self.pwl.suggest(word) if w not in suggs]) - for i in range(len(suggs) - 1, -1, -1): - if self.pel.check(suggs[i]): - del suggs[i] - return suggs - - def add(self, word): - """Add a word to the associated personal word list. - - This method adds the given word to the personal word list, and - automatically saves the list to disk. - """ - self._check_this() - self.pwl.add(word) - self.pel.remove(word) - - def remove(self, word): - """Add a word to the associated exclude list.""" - self._check_this() - self.pwl.remove(word) - self.pel.add(word) - - def add_to_pwl(self, word): - """Add a word to the associated personal word list. - - This method adds the given word to the personal word list, and - automatically saves the list to disk. - """ - self._check_this() - self.pwl.add_to_pwl(word) - self.pel.remove(word) - - def is_added(self, word): - """Check whether a word is in the personal word list.""" - self._check_this() - return self.pwl.is_added(word) - - def is_removed(self, word): - """Check whether a word is in the personal exclude list.""" - self._check_this() - return self.pel.is_added(word) - - -## Create a module-level default broker object, and make its important -## methods available at the module level. -_broker = Broker() -request_dict = _broker.request_dict -request_pwl_dict = _broker.request_pwl_dict -dict_exists = _broker.dict_exists -list_dicts = _broker.list_dicts -list_languages = _broker.list_languages -get_param = _broker.get_param -set_param = _broker.set_param - - -# Expose the "get_version" function. -def get_enchant_version(): - """Get the version string for the underlying enchant library.""" - return _e.get_version() - - -# Run unit tests when called from comand-line -if __name__ == "__main__": - import sys - import enchant.tests - - res = enchant.tests.runtestsuite() - if len(res.errors) > 0 or len(res.failures) > 0: - sys.exit(1) - sys.exit(0) diff --git a/PACK/enchant/_enchant.py b/PACK/enchant/_enchant.py deleted file mode 100644 index 3871bbd..0000000 --- a/PACK/enchant/_enchant.py +++ /dev/null @@ -1,366 +0,0 @@ -# pyenchant -# -# Copyright (C) 2004-2008, Ryan Kelly -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPsE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the -# Free Software Foundation, Inc., 59 Temple Place - Suite 330, -# Boston, MA 02111-1307, USA. -# -# In addition, as a special exception, you are -# given permission to link the code of this program with -# non-LGPL Spelling Provider libraries (eg: a MSFT Office -# spell checker backend) and distribute linked combinations including -# the two. You must obey the GNU Lesser General Public License in all -# respects for all of the code used other than said providers. If you modify -# this file, you may extend this exception to your version of the -# file, but you are not obligated to do so. If you do not wish to -# do so, delete this exception statement from your version. -# -""" - - enchant._enchant: ctypes-based wrapper for enchant C library - - This module implements the low-level interface to the underlying - C library for enchant. The interface is based on ctypes and tries - to do as little as possible while making the higher-level components - easier to write. - - The following conveniences are provided that differ from the underlying - C API: - - * the "enchant" prefix has been removed from all functions, since - python has a proper module system - * callback functions do not take a user_data argument, since - python has proper closures that can manage this internally - * string lengths are not passed into functions such as dict_check, - since python strings know how long they are - -""" - -import sys, os, os.path -from ctypes import * -from ctypes.util import find_library - -from enchant import utils -from enchant.errors import * -from enchant.utils import unicode - -# Locate and load the enchant dll. -# We've got several options based on the host platform. - -e = None - - -def _e_path_possibilities(): - """Generator yielding possible locations of the enchant library.""" - yield os.environ.get("PYENCHANT_LIBRARY_PATH") - yield find_library("enchant") - yield find_library("libenchant") - yield find_library("libenchant-1") - if sys.platform == 'darwin': - # enchant lib installed by macports - yield "/opt/local/lib/libenchant.dylib" - - -# On win32 we ship a bundled version of the enchant DLLs. -# Use them if they're present. -if sys.platform == "win32": - e_path = None - try: - e_path = utils.get_resource_filename("libenchant.dll") - except (Error, ImportError): - try: - e_path = utils.get_resource_filename("libenchant-1.dll") - except (Error, ImportError): - pass - if e_path is not None: - # We need to use LoadLibraryEx with LOAD_WITH_ALTERED_SEARCH_PATH so - # that we don't accidentally suck in other versions of e.g. glib. - if not isinstance(e_path, unicode): - e_path = unicode(e_path, sys.getfilesystemencoding()) - LoadLibraryEx = windll.kernel32.LoadLibraryExW - LOAD_WITH_ALTERED_SEARCH_PATH = 0x00000008 - e_handle = LoadLibraryEx(e_path, None, LOAD_WITH_ALTERED_SEARCH_PATH) - if not e_handle: - raise WinError() - e = CDLL(e_path, handle=e_handle) - -# On darwin we ship a bundled version of the enchant DLLs. -# Use them if they're present. -if e is None and sys.platform == "darwin": - try: - e_path = utils.get_resource_filename("lib/libenchant.1.dylib") - except (Error, ImportError): - pass - else: - # Enchant doesn't natively support relocatable binaries on OSX. - # We fake it by patching the enchant source to expose a char**, which - # we can write the runtime path into ourelves. - e = CDLL(e_path) - try: - e_dir = os.path.dirname(os.path.dirname(e_path)) - prefix_dir = POINTER(c_char_p).in_dll(e, "enchant_prefix_dir_p") - prefix_dir.contents = c_char_p(e_dir) - except AttributeError: - e = None - -# Not found yet, search various standard system locations. -if e is None: - for e_path in _e_path_possibilities(): - if e_path is not None: - try: - e = cdll.LoadLibrary(e_path) - except OSError: - pass - else: - break - -# No usable enchant install was found :-( -if e is None: - raise ImportError("enchant C library not found") - - -# Define various callback function types - -def CALLBACK(restype, *argtypes): - """Factory for generating callback function prototypes. - - This is factored into a factory so I can easily change the definition - for experimentation or debugging. - """ - return CFUNCTYPE(restype, *argtypes) - - -t_broker_desc_func = CALLBACK(None, c_char_p, c_char_p, c_char_p, c_void_p) -t_dict_desc_func = CALLBACK(None, c_char_p, c_char_p, c_char_p, c_char_p, c_void_p) - -# Simple typedefs for readability - -t_broker = c_void_p -t_dict = c_void_p - -# Now we can define the types of each function we are going to use - -broker_init = e.enchant_broker_init -broker_init.argtypes = [] -broker_init.restype = t_broker - -broker_free = e.enchant_broker_free -broker_free.argtypes = [t_broker] -broker_free.restype = None - -broker_request_dict = e.enchant_broker_request_dict -broker_request_dict.argtypes = [t_broker, c_char_p] -broker_request_dict.restype = t_dict - -broker_request_pwl_dict = e.enchant_broker_request_pwl_dict -broker_request_pwl_dict.argtypes = [t_broker, c_char_p] -broker_request_pwl_dict.restype = t_dict - -broker_free_dict = e.enchant_broker_free_dict -broker_free_dict.argtypes = [t_broker, t_dict] -broker_free_dict.restype = None - -broker_dict_exists = e.enchant_broker_dict_exists -broker_dict_exists.argtypes = [t_broker, c_char_p] -broker_free_dict.restype = c_int - -broker_set_ordering = e.enchant_broker_set_ordering -broker_set_ordering.argtypes = [t_broker, c_char_p, c_char_p] -broker_set_ordering.restype = None - -broker_get_error = e.enchant_broker_get_error -broker_get_error.argtypes = [t_broker] -broker_get_error.restype = c_char_p - -broker_describe1 = e.enchant_broker_describe -broker_describe1.argtypes = [t_broker, t_broker_desc_func, c_void_p] -broker_describe1.restype = None - - -def broker_describe(broker, cbfunc): - def cbfunc1(*args): - cbfunc(*args[:-1]) - - broker_describe1(broker, t_broker_desc_func(cbfunc1), None) - - -broker_list_dicts1 = e.enchant_broker_list_dicts -broker_list_dicts1.argtypes = [t_broker, t_dict_desc_func, c_void_p] -broker_list_dicts1.restype = None - - -def broker_list_dicts(broker, cbfunc): - def cbfunc1(*args): - cbfunc(*args[:-1]) - - broker_list_dicts1(broker, t_dict_desc_func(cbfunc1), None) - - -try: - broker_get_param = e.enchant_broker_get_param -except AttributeError: - # Make the lookup error occur at runtime - def broker_get_param(broker, param_name): - return e.enchant_broker_get_param(param_name) -else: - broker_get_param.argtypes = [t_broker, c_char_p] - broker_get_param.restype = c_char_p - -try: - broker_set_param = e.enchant_broker_set_param -except AttributeError: - # Make the lookup error occur at runtime - def broker_set_param(broker, param_name): - return e.enchant_broker_set_param(param_name) -else: - broker_set_param.argtypes = [t_broker, c_char_p, c_char_p] - broker_set_param.restype = None - -try: - get_version = e.enchant_get_version -except AttributeError: - # Make the lookup error occur at runtime - def get_version(): - return e.enchant_get_version() -else: - get_version.argtypes = [] - get_version.restype = c_char_p - -dict_check1 = e.enchant_dict_check -dict_check1.argtypes = [t_dict, c_char_p, c_size_t] -dict_check1.restype = c_int - - -def dict_check(dict, word): - return dict_check1(dict, word, len(word)) - - -dict_suggest1 = e.enchant_dict_suggest -dict_suggest1.argtypes = [t_dict, c_char_p, c_size_t, POINTER(c_size_t)] -dict_suggest1.restype = POINTER(c_char_p) - - -def dict_suggest(dict, word): - numSuggsP = pointer(c_size_t(0)) - suggs_c = dict_suggest1(dict, word, len(word), numSuggsP) - suggs = [] - n = 0 - while n < numSuggsP.contents.value: - suggs.append(suggs_c[n]) - n = n + 1 - if numSuggsP.contents.value > 0: - dict_free_string_list(dict, suggs_c) - return suggs - - -dict_add1 = e.enchant_dict_add -dict_add1.argtypes = [t_dict, c_char_p, c_size_t] -dict_add1.restype = None - - -def dict_add(dict, word): - return dict_add1(dict, word, len(word)) - - -dict_add_to_pwl1 = e.enchant_dict_add -dict_add_to_pwl1.argtypes = [t_dict, c_char_p, c_size_t] -dict_add_to_pwl1.restype = None - - -def dict_add_to_pwl(dict, word): - return dict_add_to_pwl1(dict, word, len(word)) - - -dict_add_to_session1 = e.enchant_dict_add_to_session -dict_add_to_session1.argtypes = [t_dict, c_char_p, c_size_t] -dict_add_to_session1.restype = None - - -def dict_add_to_session(dict, word): - return dict_add_to_session1(dict, word, len(word)) - - -dict_remove1 = e.enchant_dict_remove -dict_remove1.argtypes = [t_dict, c_char_p, c_size_t] -dict_remove1.restype = None - - -def dict_remove(dict, word): - return dict_remove1(dict, word, len(word)) - - -dict_remove_from_session1 = e.enchant_dict_remove_from_session -dict_remove_from_session1.argtypes = [t_dict, c_char_p, c_size_t] -dict_remove_from_session1.restype = c_int - - -def dict_remove_from_session(dict, word): - return dict_remove_from_session1(dict, word, len(word)) - - -dict_is_added1 = e.enchant_dict_is_added -dict_is_added1.argtypes = [t_dict, c_char_p, c_size_t] -dict_is_added1.restype = c_int - - -def dict_is_added(dict, word): - return dict_is_added1(dict, word, len(word)) - - -dict_is_removed1 = e.enchant_dict_is_removed -dict_is_removed1.argtypes = [t_dict, c_char_p, c_size_t] -dict_is_removed1.restype = c_int - - -def dict_is_removed(dict, word): - return dict_is_removed1(dict, word, len(word)) - - -dict_is_in_session1 = e.enchant_dict_is_in_session -dict_is_in_session1.argtypes = [t_dict, c_char_p, c_size_t] -dict_is_in_session1.restype = c_int - - -def dict_is_in_session(dict, word): - return dict_is_in_session1(dict, word, len(word)) - - -dict_store_replacement1 = e.enchant_dict_store_replacement -dict_store_replacement1.argtypes = [t_dict, c_char_p, c_size_t, c_char_p, c_size_t] -dict_store_replacement1.restype = None - - -def dict_store_replacement(dict, mis, cor): - return dict_store_replacement1(dict, mis, len(mis), cor, len(cor)) - - -dict_free_string_list = e.enchant_dict_free_string_list -dict_free_string_list.argtypes = [t_dict, POINTER(c_char_p)] -dict_free_string_list.restype = None - -dict_get_error = e.enchant_dict_get_error -dict_get_error.argtypes = [t_dict] -dict_get_error.restype = c_char_p - -dict_describe1 = e.enchant_dict_describe -dict_describe1.argtypes = [t_dict, t_dict_desc_func, c_void_p] -dict_describe1.restype = None - - -def dict_describe(dict, cbfunc): - def cbfunc1(tag, name, desc, file, data): - cbfunc(tag, name, desc, file) - - dict_describe1(dict, t_dict_desc_func(cbfunc1), None) diff --git a/PACK/enchant/checker/CmdLineChecker.py b/PACK/enchant/checker/CmdLineChecker.py deleted file mode 100644 index fd3be0b..0000000 --- a/PACK/enchant/checker/CmdLineChecker.py +++ /dev/null @@ -1,203 +0,0 @@ -# pyenchant -# -# Copyright (C) 2004-2008, Ryan Kelly -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the -# Free Software Foundation, Inc., 59 Temple Place - Suite 330, -# Boston, MA 02111-1307, USA. -# -# In addition, as a special exception, you are -# given permission to link the code of this program with -# non-LGPL Spelling Provider libraries (eg: a MSFT Office -# spell checker backend) and distribute linked combinations including -# the two. You must obey the GNU Lesser General Public License in all -# respects for all of the code used other than said providers. If you modify -# this file, you may extend this exception to your version of the -# file, but you are not obligated to do so. If you do not wish to -# do so, delete this exception statement from your version. -# -""" - - enchant.checker.CmdLineChecker: Command-Line spell checker - - This module provides the class CmdLineChecker, which interactively - spellchecks a piece of text by interacting with the user on the - command line. It can also be run as a script to spellcheck a file. - -""" - -import sys - -from enchant.checker import SpellChecker -from enchant.utils import printf - - -class CmdLineChecker: - """A simple command-line spell checker. - - This class implements a simple command-line spell checker. It must - be given a SpellChecker instance to operate on, and interacts with - the user by printing instructions on stdout and reading commands from - stdin. - """ - _DOC_ERRORS = ["stdout", "stdin"] - - def __init__(self): - self._stop = False - self._checker = None - - def set_checker(self, chkr): - self._checker = chkr - - def get_checker(self, chkr): - return self._checker - - def run(self): - """Run the spellchecking loop.""" - self._stop = False - for err in self._checker: - self.error = err - printf(["ERROR:", err.word]) - printf(["HOW ABOUT:", err.suggest()]) - status = self.read_command() - while not status and not self._stop: - status = self.read_command() - if self._stop: - break - printf(["DONE"]) - - def print_help(self): - printf(["0..N: replace with the numbered suggestion"]) - printf(["R0..rN: always replace with the numbered suggestion"]) - printf(["i: ignore this word"]) - printf(["I: always ignore this word"]) - printf(["a: add word to personal dictionary"]) - printf(["e: edit the word"]) - printf(["q: quit checking"]) - printf(["h: print this help message"]) - printf(["----------------------------------------------------"]) - printf(["HOW ABOUT:", self.error.suggest()]) - - def read_command(self): - cmd = raw_input(">> ") - cmd = cmd.strip() - - if cmd.isdigit(): - repl = int(cmd) - suggs = self.error.suggest() - if repl >= len(suggs): - printf(["No suggestion number", repl]) - return False - printf(["Replacing '%s' with '%s'" % (self.error.word, suggs[repl])]) - self.error.replace(suggs[repl]) - return True - - if cmd[0] == "R": - if not cmd[1:].isdigit(): - printf(["Badly formatted command (try 'help')"]) - return False - repl = int(cmd[1:]) - suggs = self.error.suggest() - if repl >= len(suggs): - printf(["No suggestion number", repl]) - return False - self.error.replace_always(suggs[repl]) - return True - - if cmd == "i": - return True - - if cmd == "I": - self.error.ignore_always() - return True - - if cmd == "a": - self.error.add() - return True - - if cmd == "e": - repl = raw_input("New Word: ") - self.error.replace(repl.strip()) - return True - - if cmd == "q": - self._stop = True - return True - - if "help".startswith(cmd.lower()): - self.print_help() - return False - - printf(["Badly formatted command (try 'help')"]) - return False - - def run_on_file(self, infile, outfile=None, enc=None): - """Run spellchecking on the named file. - This method can be used to run the spellchecker over the named file. - If is not given, the corrected contents replace the contents - of . If is given, the corrected contents will be - written to that file. Use "-" to have the contents written to stdout. - If is given, it specifies the encoding used to read the - file's contents into a unicode string. The output will be written - in the same encoding. - """ - inStr = "".join(file(infile, "r").readlines()) - if enc is not None: - inStr = inStr.decode(enc) - self._checker.set_text(inStr) - self.run() - outStr = self._checker.get_text() - if enc is not None: - outStr = outStr.encode(enc) - if outfile is None: - outF = file(infile, "w") - elif outfile == "-": - outF = sys.stdout - else: - outF = file(outfile, "w") - outF.write(outStr) - outF.close() - - run_on_file._DOC_ERRORS = ["outfile", "infile", "outfile", "stdout"] - - -def _run_as_script(): - """Run the command-line spellchecker as a script. - This function allows the spellchecker to be invoked from the command-line - to check spelling in a file. - """ - # Check necessary command-line options - from optparse import OptionParser - op = OptionParser() - op.add_option("-o", "--output", dest="outfile", metavar="FILE", - help="write changes into FILE") - op.add_option("-l", "--lang", dest="lang", metavar="TAG", default="en_US", - help="use language idenfified by TAG") - op.add_option("-e", "--encoding", dest="enc", metavar="ENC", - help="file is unicode with encoding ENC") - (opts, args) = op.parse_args() - # Sanity check - if len(args) < 1: - raise ValueError("Must name a file to check") - if len(args) > 1: - raise ValueError("Can only check a single file") - # Create and run the checker - chkr = SpellChecker(opts.lang) - cmdln = CmdLineChecker() - cmdln.set_checker(chkr) - cmdln.run_on_file(args[0], opts.outfile, opts.enc) - - -if __name__ == "__main__": - _run_as_script() diff --git a/PACK/enchant/checker/GtkSpellCheckerDialog.py b/PACK/enchant/checker/GtkSpellCheckerDialog.py deleted file mode 100644 index 0d8f69a..0000000 --- a/PACK/enchant/checker/GtkSpellCheckerDialog.py +++ /dev/null @@ -1,304 +0,0 @@ -# GtkSpellCheckerDialog for pyenchant -# -# Copyright (C) 2004-2005, Fredrik Corneliusson -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the -# Free Software Foundation, Inc., 59 Temple Place - Suite 330, -# Boston, MA 02111-1307, USA. -# -# In addition, as a special exception, you are -# given permission to link the code of this program with -# non-LGPL Spelling Provider libraries (eg: a MSFT Office -# spell checker backend) and distribute linked combinations including -# the two. You must obey the GNU Lesser General Public License in all -# respects for all of the code used other than said providers. If you modify -# this file, you may extend this exception to your version of the -# file, but you are not obligated to do so. If you do not wish to -# do so, delete this exception statement from your version. -# - -import gtk -import gobject - -from enchant.utils import printf, unicode - -# columns -COLUMN_SUGGESTION = 0 - - -def create_list_view(col_label, ): - # create list widget - list_ = gtk.ListStore(str) - list_view = gtk.TreeView(model=list_) - - list_view.set_rules_hint(True) - list_view.get_selection().set_mode(gtk.SELECTION_SINGLE) - # Add Colums - renderer = gtk.CellRendererText() - renderer.set_data("column", COLUMN_SUGGESTION) - column = gtk.TreeViewColumn(col_label, renderer, text=COLUMN_SUGGESTION) - list_view.append_column(column) - return list_view - - -class GtkSpellCheckerDialog(gtk.Window): - def __init__(self, *args, **kwargs): - gtk.Window.__init__(self, *args, **kwargs) - self.set_title('Spell check') - self.set_default_size(350, 200) - - self._checker = None - self._numContext = 40 - - self.errors = None - - # create accel group - accel_group = gtk.AccelGroup() - self.add_accel_group(accel_group) - - # list of widgets to disable if there's no spell error left - self._conditional_widgets = [] - conditional = self._conditional_widgets.append - - # layout - mainbox = gtk.VBox(spacing=5) - hbox = gtk.HBox(spacing=5) - self.add(mainbox) - mainbox.pack_start(hbox, padding=5) - - box1 = gtk.VBox(spacing=5) - hbox.pack_start(box1, padding=5) - conditional(box1) - - # unreconized word - text_view_lable = gtk.Label('Unreconized word') - text_view_lable.set_justify(gtk.JUSTIFY_LEFT) - box1.pack_start(text_view_lable, False, False) - - text_view = gtk.TextView() - text_view.set_wrap_mode(gtk.WRAP_WORD) - text_view.set_editable(False) - text_view.set_cursor_visible(False) - self.error_text = text_view.get_buffer() - text_buffer = text_view.get_buffer() - text_buffer.create_tag("fg_black", foreground="black") - text_buffer.create_tag("fg_red", foreground="red") - - box1.pack_start(text_view) - - # Change to - change_to_box = gtk.HBox() - box1.pack_start(change_to_box, False, False) - - change_to_label = gtk.Label('Change to:') - self.replace_text = gtk.Entry() - text_view_lable.set_justify(gtk.JUSTIFY_LEFT) - change_to_box.pack_start(change_to_label, False, False) - change_to_box.pack_start(self.replace_text) - - # scrolled window - sw = gtk.ScrolledWindow() - sw.set_shadow_type(gtk.SHADOW_ETCHED_IN) - sw.set_policy(gtk.POLICY_AUTOMATIC, gtk.POLICY_AUTOMATIC) - box1.pack_start(sw) - - self.suggestion_list_view = create_list_view('Suggestions') - self.suggestion_list_view.connect("button_press_event", self._onButtonPress) - self.suggestion_list_view.connect("cursor-changed", self._onSuggestionChanged) - sw.add(self.suggestion_list_view) - - # ---Buttons---#000000#FFFFFF---------------------------------------------------- - button_box = gtk.VButtonBox() - hbox.pack_start(button_box, False, False) - - # Ignore - button = gtk.Button("Ignore") - button.connect("clicked", self._onIgnore) - button.add_accelerator("activate", accel_group, - gtk.keysyms.Return, 0, gtk.ACCEL_VISIBLE) - button_box.pack_start(button) - conditional(button) - - # Ignore all - button = gtk.Button("Ignore All") - button.connect("clicked", self._onIgnoreAll) - button_box.pack_start(button) - conditional(button) - - # Replace - button = gtk.Button("Replace") - button.connect("clicked", self._onReplace) - button_box.pack_start(button) - conditional(button) - - # Replace all - button = gtk.Button("Replace All") - button.connect("clicked", self._onReplaceAll) - button_box.pack_start(button) - conditional(button) - - # Recheck button - button = gtk.Button("_Add") - button.connect("clicked", self._onAdd) - - button_box.pack_start(button) - conditional(button) - - # Close button - button = gtk.Button(stock=gtk.STOCK_CLOSE) - button.connect("clicked", self._onClose) - button.add_accelerator("activate", accel_group, - gtk.keysyms.Escape, 0, gtk.ACCEL_VISIBLE) - button_box.pack_end(button) - - # dictionary label - self._dict_lable = gtk.Label('') - mainbox.pack_start(self._dict_lable, False, False, padding=5) - - mainbox.show_all() - - def _onIgnore(self, w, *args): - printf(["ignore"]) - self._advance() - - def _onIgnoreAll(self, w, *args): - printf(["ignore all"]) - self._checker.ignore_always() - self._advance() - - def _onReplace(self, *args): - printf(["Replace"]) - repl = self._getRepl() - self._checker.replace(repl) - self._advance() - - def _onReplaceAll(self, *args): - printf(["Replace all"]) - repl = self._getRepl() - self._checker.replace_always(repl) - self._advance() - - def _onAdd(self, *args): - """Callback for the "add" button.""" - self._checker.add() - self._advance() - - def _onClose(self, w, *args): - self.emit('delete_event', gtk.gdk.Event(gtk.gdk.BUTTON_PRESS)) - return True - - def _onButtonPress(self, widget, event): - if event.type == gtk.gdk._2BUTTON_PRESS: - printf(["Double click!"]) - self._onReplace() - - def _onSuggestionChanged(self, widget, *args): - selection = self.suggestion_list_view.get_selection() - model, iter = selection.get_selected() - if iter: - suggestion = model.get_value(iter, COLUMN_SUGGESTION) - self.replace_text.set_text(suggestion) - - def _getRepl(self): - """Get the chosen replacement string.""" - repl = self.replace_text.get_text() - repl = self._checker.coerce_string(repl) - return repl - - def _fillSuggestionList(self, suggestions): - model = self.suggestion_list_view.get_model() - model.clear() - for suggestion in suggestions: - value = unicode("%s" % (suggestion,)) - model.append([value, ]) - - def setSpellChecker(self, checker): - assert checker, 'checker cant be None' - self._checker = checker - self._dict_lable.set_text('Dictionary:%s' % (checker.dict.tag,)) - - def getSpellChecker(self, checker): - return self._checker - - def updateUI(self): - self._advance() - - def _disableButtons(self): - for w in self._conditional_widgets: - w.set_sensitive(False) - - def _enableButtons(self): - for w in self._conditional_widgets: - w.set_sensitive(True) - - def _advance(self): - """Advance to the next error. - This method advances the SpellChecker to the next error, if - any. It then displays the error and some surrounding context, - and well as listing the suggested replacements. - """ - # Disable interaction if no checker - if self._checker is None: - self._disableButtons() - self.emit('check-done') - return - - # Advance to next error, disable if not available - try: - self._checker.next() - except StopIteration: - self._disableButtons() - self.error_text.set_text("") - self._fillSuggestionList([]) - self.replace_text.set_text("") - return - self._enableButtons() - - # Display error context with erroneous word in red - self.error_text.set_text('') - iter = self.error_text.get_iter_at_offset(0) - append = self.error_text.insert_with_tags_by_name - - lContext = self._checker.leading_context(self._numContext) - tContext = self._checker.trailing_context(self._numContext) - append(iter, lContext, 'fg_black') - append(iter, self._checker.word, 'fg_red') - append(iter, tContext, 'fg_black') - - # Display suggestions in the replacements list - suggs = self._checker.suggest() - self._fillSuggestionList(suggs) - if suggs: - self.replace_text.set_text(suggs[0]) - else: - self.replace_text.set_text("") - - -def _test(): - from enchant.checker import SpellChecker - text = "This is sme text with a fw speling errors in it. Here are a fw more to tst it ut." - printf(["BEFORE:", text]) - chk_dlg = GtkSpellCheckerDialog() - chk_dlg.show() - chk_dlg.connect('delete_event', gtk.main_quit) - - chkr = SpellChecker("en_US", text) - - chk_dlg.setSpellChecker(chkr) - chk_dlg.updateUI() - gtk.main() - - -if __name__ == "__main__": - _test() diff --git a/PACK/enchant/checker/__init__.py b/PACK/enchant/checker/__init__.py deleted file mode 100644 index 304c0aa..0000000 --- a/PACK/enchant/checker/__init__.py +++ /dev/null @@ -1,379 +0,0 @@ -# pyenchant -# -# Copyright (C) 2004-2008, Ryan Kelly -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the -# Free Software Foundation, Inc., 59 Temple Place - Suite 330, -# Boston, MA 02111-1307, USA. -# -# In addition, as a special exception, you are -# given permission to link the code of this program with -# non-LGPL Spelling Provider libraries (eg: a MSFT Office -# spell checker backend) and distribute linked combinations including -# the two. You must obey the GNU Lesser General Public License in all -# respects for all of the code used other than said providers. If you modify -# this file, you may extend this exception to your version of the -# file, but you are not obligated to do so. If you do not wish to -# do so, delete this exception statement from your version. -# -""" - -enchant.checker: High-level spellchecking functionality -======================================================== - -This package is designed to host higher-level spellchecking functionality -than is available in the base enchant package. It should make writing -applications that follow common usage idioms significantly easier. - -The most useful class is SpellChecker, which implements a spellchecking -loop over a block of text. It is capable of modifying the text in-place -if given an array of characters to work with. - -This package also contains several interfaces to the SpellChecker class, -such as a wxPython GUI dialog and a command-line interface. - -""" - -import array -import warnings - -import enchant -from enchant.errors import * -from enchant.tokenize import get_tokenizer -from enchant.utils import bytes, unicode, basestring, next -from enchant.utils import get_default_language - - -class SpellChecker: - """Class implementing stateful spellchecking behaviour. - - This class is designed to implement a spell-checking loop over - a block of text, correcting/ignoring/replacing words as required. - This loop is implemented using an iterator paradigm so it can be - embedded inside other loops of control. - - The SpellChecker object is stateful, and the appropriate methods - must be called to alter its state and affect the progress of - the spell checking session. At any point during the checking - session, the attribute 'word' will hold the current erroneously - spelled word under consideration. The action to take on this word - is determined by calling methods such as 'replace', 'replace_always' - and 'ignore_always'. Once this is done, calling 'next' advances - to the next misspelled word. - - As a quick (and rather silly) example, the following code replaces - each misspelled word with the string "SPAM": - - >>> text = "This is sme text with a fw speling errors in it." - >>> chkr = SpellChecker("en_US",text) - >>> for err in chkr: - ... err.replace("SPAM") - ... - >>> chkr.get_text() - 'This is SPAM text with a SPAM SPAM errors in it.' - >>> - - Internally, the SpellChecker always works with arrays of (possibly - unicode) character elements. This allows the in-place modification - of the string as it is checked, and is the closest thing Python has - to a mutable string. The text can be set as any of a normal string, - unicode string, character array or unicode character array. The - 'get_text' method will return the modified array object if an - array is used, or a new string object if a string it used. - - Words input to the SpellChecker may be either plain strings or - unicode objects. They will be converted to the same type as the - text being checked, using python's default encoding/decoding - settings. - - If using an array of characters with this object and the - array is modified outside of the spellchecking loop, use the - 'set_offset' method to reposition the internal loop pointer - to make sure it doesn't skip any words. - - """ - _DOC_ERRORS = ["sme", "fw", "speling", "chkr", "chkr", "chkr"] - - def __init__(self, lang=None, text=None, tokenize=None, chunkers=None, filters=None): - """Constructor for the SpellChecker class. - - SpellChecker objects can be created in two ways, depending on - the nature of the first argument. If it is a string, it - specifies a language tag from which a dictionary is created. - Otherwise, it must be an enchant Dict object to be used. - - Optional keyword arguments are: - - * text: to set the text to be checked at creation time - * tokenize: a custom tokenization function to use - * chunkers: a list of chunkers to apply during tokenization - * filters: a list of filters to apply during tokenization - - If is not given and the first argument is a Dict, - its 'tag' attribute must be a language tag so that a tokenization - function can be created automatically. If this attribute is missing - the user's default language will be used. - """ - if lang is None: - lang = get_default_language() - if isinstance(lang, basestring): - dict = enchant.Dict(lang) - else: - dict = lang - try: - lang = dict.tag - except AttributeError: - lang = get_default_language() - if lang is None: - raise DefaultLanguageNotFoundError - self.lang = lang - self.dict = dict - if tokenize is None: - try: - tokenize = get_tokenizer(lang, chunkers, filters) - except TokenizerNotFoundError: - # Fall back to default tokenization if no match for 'lang' - tokenize = get_tokenizer(None, chunkers, filters) - self._tokenize = tokenize - - self.word = None - self.wordpos = None - self._ignore_words = {} - self._replace_words = {} - # Default to the empty string as the text to be checked - self._text = array.array('u') - self._use_tostring = False - self._tokens = iter([]) - - if text is not None: - self.set_text(text) - - def __iter__(self): - """Each SpellChecker object is its own iterator""" - return self - - def set_text(self, text): - """Set the text to be spell-checked. - - This method must be called, or the 'text' argument supplied - to the constructor, before calling the 'next()' method. - """ - # Convert to an array object if necessary - if isinstance(text, basestring): - if type(text) is unicode: - self._text = array.array('u', text) - else: - self._text = array.array('c', text) - self._use_tostring = True - else: - self._text = text - self._use_tostring = False - self._tokens = self._tokenize(self._text) - - def get_text(self): - """Return the spell-checked text.""" - if self._use_tostring: - return self._array_to_string(self._text) - return self._text - - def _array_to_string(self, text): - """Format an internal array as a standard string.""" - if text.typecode == 'u': - return text.tounicode() - return text.tostring() - - def wants_unicode(self): - """Check whether the checker wants unicode strings. - - This method will return True if the checker wants unicode strings - as input, False if it wants normal strings. It's important to - provide the correct type of string to the checker. - """ - if self._text.typecode == 'u': - return True - return False - - def coerce_string(self, text, enc=None): - """Coerce string into the required type. - - This method can be used to automatically ensure that strings - are of the correct type required by this checker - either unicode - or standard. If there is a mismatch, conversion is done using - python's default encoding unless another encoding is specified. - """ - if self.wants_unicode(): - if not isinstance(text, unicode): - if enc is None: - return text.decode() - else: - return text.decode(enc) - return text - if not isinstance(text, bytes): - if enc is None: - return text.encode() - else: - return text.encode(enc) - return text - - def __next__(self): - return self.next() - - def next(self): - """Process text up to the next spelling error. - - This method is designed to support the iterator protocol. - Each time it is called, it will advance the 'word' attribute - to the next spelling error in the text. When no more errors - are found, it will raise StopIteration. - - The method will always return self, so that it can be used - sensibly in common idioms such as: - - for err in checker: - err.do_something() - - """ - # Find the next spelling error. - # The uncaught StopIteration from next(self._tokens) - # will provide the StopIteration for this method - while True: - (word, pos) = next(self._tokens) - # decode back to a regular string - word = self._array_to_string(word) - if self.dict.check(word): - continue - if word in self._ignore_words: - continue - self.word = word - self.wordpos = pos - if word in self._replace_words: - self.replace(self._replace_words[word]) - continue - break - return self - - def replace(self, repl): - """Replace the current erroneous word with the given string.""" - repl = self.coerce_string(repl) - aRepl = array.array(self._text.typecode, repl) - if repl: - self.dict.store_replacement(self.word, repl) - self._text[self.wordpos:self.wordpos + len(self.word)] = aRepl - incr = len(repl) - len(self.word) - self._tokens.set_offset(self._tokens.offset + incr, replaced=True) - - def replace_always(self, word, repl=None): - """Always replace given word with given replacement. - - If a single argument is given, this is used to replace the - current erroneous word. If two arguments are given, that - combination is added to the list for future use. - """ - if repl is None: - repl = word - word = self.word - repl = self.coerce_string(repl) - word = self.coerce_string(word) - self._replace_words[word] = repl - if self.word == word: - self.replace(repl) - - def ignore_always(self, word=None): - """Add given word to list of words to ignore. - - If no word is given, the current erroneous word is added. - """ - if word is None: - word = self.word - word = self.coerce_string(word) - if word not in self._ignore_words: - self._ignore_words[word] = True - - def add_to_personal(self, word=None): - """Add given word to the personal word list. - - If no word is given, the current erroneous word is added. - """ - warnings.warn("SpellChecker.add_to_personal is deprecated, " \ - "please use SpellChecker.add", - category=DeprecationWarning, stacklevel=2) - self.add(word) - - def add(self, word=None): - """Add given word to the personal word list. - - If no word is given, the current erroneous word is added. - """ - if word is None: - word = self.word - self.dict.add(word) - - def suggest(self, word=None): - """Return suggested spellings for the given word. - - If no word is given, the current erroneous word is used. - """ - if word is None: - word = self.word - suggs = self.dict.suggest(word) - return suggs - - def check(self, word): - """Check correctness of the given word.""" - return self.dict.check(word) - - def set_offset(self, off, whence=0): - """Set the offset of the tokenization routine. - - For more details on the purpose of the tokenization offset, - see the documentation of the 'enchant.tokenize' module. - The optional argument whence indicates the method by - which to change the offset: - * 0 (the default) treats as an increment - * 1 treats as a distance from the start - * 2 treats as a distance from the end - """ - if whence == 0: - self._tokens.set_offset(self._tokens.offset + off) - elif whence == 1: - assert (off > 0) - self._tokens.set_offset(off) - elif whence == 2: - assert (off > 0) - self._tokens.set_offset(len(self._text) - 1 - off) - else: - raise ValueError("Invalid value for whence: %s" % (whence,)) - - def leading_context(self, chars): - """Get characters of leading context. - - This method returns up to characters of leading - context - the text that occurs in the string immediately - before the current erroneous word. - """ - start = max(self.wordpos - chars, 0) - context = self._text[start:self.wordpos] - return self._array_to_string(context) - - def trailing_context(self, chars): - """Get characters of trailing context. - - This method returns up to characters of trailing - context - the text that occurs in the string immediately - after the current erroneous word. - """ - start = self.wordpos + len(self.word) - end = min(start + chars, len(self._text)) - context = self._text[start:end] - return self._array_to_string(context) diff --git a/PACK/enchant/checker/tests.py b/PACK/enchant/checker/tests.py deleted file mode 100644 index e99b552..0000000 --- a/PACK/enchant/checker/tests.py +++ /dev/null @@ -1,246 +0,0 @@ -# pyenchant -# -# Copyright (C) 2004-2009, Ryan Kelly -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the -# Free Software Foundation, Inc., 59 Temple Place - Suite 330, -# Boston, MA 02111-1307, USA. -# -# In addition, as a special exception, you are -# given permission to link the code of this program with -# non-LGPL Spelling Provider libraries (eg: a MSFT Office -# spell checker backend) and distribute linked combinations including -# the two. You must obey the GNU Lesser General Public License in all -# respects for all of the code used other than said providers. If you modify -# this file, you may extend this exception to your version of the -# file, but you are not obligated to do so. If you do not wish to -# do so, delete this exception statement from your version. -# -""" - - enchant.checker.tests: Unittests for enchant SpellChecker class - -""" - -import unittest - -import enchant -import enchant.tokenize -from enchant.utils import * -from enchant.errors import * -from enchant.checker import * - - -class TestChecker(unittest.TestCase): - """TestCases for checking behaviour of SpellChecker class.""" - - def test_basic(self): - """Test a basic run of the SpellChecker class.""" - text = """This is sme text with a few speling erors in it. Its gret - for checking wheather things are working proprly with the SpellChecker - class. Not gret for much elss though.""" - chkr = SpellChecker("en_US", text=text) - for n, err in enumerate(chkr): - if n == 0: - # Fix up "sme" -> "some" properly - self.assertEqual(err.word, "sme") - self.assertEqual(err.wordpos, 8) - self.assertTrue("some" in err.suggest()) - err.replace("some") - if n == 1: - # Ignore "speling" - self.assertEqual(err.word, "speling") - if n == 2: - # Check context around "erors", and replace - self.assertEqual(err.word, "erors") - self.assertEqual(err.leading_context(5), "ling ") - self.assertEqual(err.trailing_context(5), " in i") - err.replace(raw_unicode("errors")) - if n == 3: - # Replace-all on gret as it appears twice - self.assertEqual(err.word, "gret") - err.replace_always("great") - if n == 4: - # First encounter with "wheather", move offset back - self.assertEqual(err.word, "wheather") - err.set_offset(-1 * len(err.word)) - if n == 5: - # Second encounter, fix up "wheather' - self.assertEqual(err.word, "wheather") - err.replace("whether") - if n == 6: - # Just replace "proprly", but also add an ignore - # for "SpellChecker" - self.assertEqual(err.word, "proprly") - err.replace("properly") - err.ignore_always("SpellChecker") - if n == 7: - # The second "gret" should have been replaced - # So it's now on "elss" - self.assertEqual(err.word, "elss") - err.replace("else") - if n > 7: - self.fail("Extraneous spelling errors were found") - text2 = """This is some text with a few speling errors in it. Its great - for checking whether things are working properly with the SpellChecker - class. Not great for much else though.""" - self.assertEqual(chkr.get_text(), text2) - - def test_filters(self): - """Test SpellChecker with the 'filters' argument.""" - text = """I contain WikiWords that ShouldBe skipped by the filters""" - chkr = SpellChecker("en_US", text=text, - filters=[enchant.tokenize.WikiWordFilter]) - for err in chkr: - # There are no errors once the WikiWords are skipped - self.fail("Extraneous spelling errors were found") - self.assertEqual(chkr.get_text(), text) - - def test_chunkers(self): - """Test SpellChecker with the 'chunkers' argument.""" - text = """I contain tags that should be skipped""" - chkr = SpellChecker("en_US", text=text, - chunkers=[enchant.tokenize.HTMLChunker]) - for err in chkr: - # There are no errors when the tag is skipped - self.fail("Extraneous spelling errors were found") - self.assertEqual(chkr.get_text(), text) - - def test_chunkers_and_filters(self): - """Test SpellChecker with the 'chunkers' and 'filters' arguments.""" - text = """I contain tags that should be skipped - along with a >> dlg = wxSpellCheckerDialog(None,-1,"") - >>> chkr = SpellChecker("en_AU",text) - >>> dlg.SetSpellChecker(chkr) - >>> dlg.Show() - - This is most useful when the text to be checked is in the form of - a character array, as it will be modified in place as the user - interacts with the dialog. For checking strings, the final result - will need to be obtained from the SpellChecker object: - - >>> dlg = wxSpellCheckerDialog(None,-1,"") - >>> chkr = SpellChecker("en_AU",text) - >>> dlg.SetSpellChecker(chkr) - >>> dlg.ShowModal() - >>> text = dlg.GetSpellChecker().get_text() - - Currently the checker must deal with strings of the same type as - returned by wxPython - unicode or normal string depending on the - underlying system. This needs to be fixed, somehow... - """ - _DOC_ERRORS = ["dlg", "chkr", "dlg", "SetSpellChecker", "chkr", "dlg", - "dlg", "chkr", "dlg", "SetSpellChecker", "chkr", "dlg", - "ShowModal", "dlg", "GetSpellChecker"] - - # Remember dialog size across invocations by storing it on the class - sz = (300, 70) - - def __init__(self, parent=None, id=-1, title="Checking Spelling..."): - wx.Dialog.__init__(self, parent, id, title, size=wxSpellCheckerDialog.sz, - style=wx.DEFAULT_DIALOG_STYLE | wx.RESIZE_BORDER) - self._numContext = 40 - self._checker = None - self._buttonsEnabled = True - self.error_text = wx.TextCtrl(self, -1, "", style=wx.TE_MULTILINE | wx.TE_READONLY | wx.TE_RICH) - self.replace_text = wx.TextCtrl(self, -1, "", style=wx.TE_PROCESS_ENTER) - self.replace_list = wx.ListBox(self, -1, style=wx.LB_SINGLE) - self.InitLayout() - wx.EVT_LISTBOX(self, self.replace_list.GetId(), self.OnReplSelect) - wx.EVT_LISTBOX_DCLICK(self, self.replace_list.GetId(), self.OnReplace) - - def InitLayout(self): - """Lay out controls and add buttons.""" - sizer = wx.BoxSizer(wx.HORIZONTAL) - txtSizer = wx.BoxSizer(wx.VERTICAL) - btnSizer = wx.BoxSizer(wx.VERTICAL) - replaceSizer = wx.BoxSizer(wx.HORIZONTAL) - txtSizer.Add(wx.StaticText(self, -1, "Unrecognised Word:"), 0, wx.LEFT | wx.TOP, 5) - txtSizer.Add(self.error_text, 1, wx.ALL | wx.EXPAND, 5) - replaceSizer.Add(wx.StaticText(self, -1, "Replace with:"), 0, wx.ALL | wx.ALIGN_CENTER_VERTICAL, 5) - replaceSizer.Add(self.replace_text, 1, wx.ALL | wx.ALIGN_CENTER_VERTICAL, 5) - txtSizer.Add(replaceSizer, 0, wx.EXPAND, 0) - txtSizer.Add(self.replace_list, 2, wx.ALL | wx.EXPAND, 5) - sizer.Add(txtSizer, 1, wx.EXPAND, 0) - self.buttons = [] - for label, action, tip in ( \ - ("Ignore", self.OnIgnore, "Ignore this word and continue"), - ("Ignore All", self.OnIgnoreAll, "Ignore all instances of this word and continue"), - ("Replace", self.OnReplace, "Replace this word"), - ("Replace All", self.OnReplaceAll, "Replace all instances of this word"), - ("Add", self.OnAdd, "Add this word to the dictionary"), - ("Done", self.OnDone, "Finish spell-checking and accept changes"), - ): - btn = wx.Button(self, -1, label) - btn.SetToolTip(wx.ToolTip(tip)) - btnSizer.Add(btn, 0, wx.ALIGN_RIGHT | wx.ALL, 4) - btn.Bind(wx.EVT_BUTTON, action) - self.buttons.append(btn) - sizer.Add(btnSizer, 0, wx.ALL | wx.EXPAND, 5) - self.SetAutoLayout(True) - self.SetSizer(sizer) - sizer.Fit(self) - - def Advance(self): - """Advance to the next error. - - This method advances the SpellChecker to the next error, if - any. It then displays the error and some surrounding context, - and well as listing the suggested replacements. - """ - # Disable interaction if no checker - if self._checker is None: - self.EnableButtons(False) - return False - # Advance to next error, disable if not available - try: - self._checker.next() - except StopIteration: - self.EnableButtons(False) - self.error_text.SetValue("") - self.replace_list.Clear() - self.replace_text.SetValue("") - if self.IsModal(): # test needed for SetSpellChecker call - # auto-exit when checking complete - self.EndModal(wx.ID_OK) - return False - self.EnableButtons() - # Display error context with erroneous word in red. - # Restoring default style was misbehaving under win32, so - # I am forcing the rest of the text to be black. - self.error_text.SetValue("") - self.error_text.SetDefaultStyle(wx.TextAttr(wx.BLACK)) - lContext = self._checker.leading_context(self._numContext) - self.error_text.AppendText(lContext) - self.error_text.SetDefaultStyle(wx.TextAttr(wx.RED)) - self.error_text.AppendText(self._checker.word) - self.error_text.SetDefaultStyle(wx.TextAttr(wx.BLACK)) - tContext = self._checker.trailing_context(self._numContext) - self.error_text.AppendText(tContext) - # Display suggestions in the replacements list - suggs = self._checker.suggest() - self.replace_list.Set(suggs) - self.replace_text.SetValue(suggs and suggs[0] or '') - return True - - def EnableButtons(self, state=True): - """Enable the checking-related buttons""" - if state != self._buttonsEnabled: - for btn in self.buttons[:-1]: - btn.Enable(state) - self._buttonsEnabled = state - - def GetRepl(self): - """Get the chosen replacement string.""" - repl = self.replace_text.GetValue() - return repl - - def OnAdd(self, evt): - """Callback for the "add" button.""" - self._checker.add() - self.Advance() - - def OnDone(self, evt): - """Callback for the "close" button.""" - wxSpellCheckerDialog.sz = self.error_text.GetSizeTuple() - if self.IsModal(): - self.EndModal(wx.ID_OK) - else: - self.Close() - - def OnIgnore(self, evt): - """Callback for the "ignore" button. - This simply advances to the next error. - """ - self.Advance() - - def OnIgnoreAll(self, evt): - """Callback for the "ignore all" button.""" - self._checker.ignore_always() - self.Advance() - - def OnReplace(self, evt): - """Callback for the "replace" button.""" - repl = self.GetRepl() - if repl: - self._checker.replace(repl) - self.Advance() - - def OnReplaceAll(self, evt): - """Callback for the "replace all" button.""" - repl = self.GetRepl() - self._checker.replace_always(repl) - self.Advance() - - def OnReplSelect(self, evt): - """Callback when a new replacement option is selected.""" - sel = self.replace_list.GetSelection() - if sel == -1: - return - opt = self.replace_list.GetString(sel) - self.replace_text.SetValue(opt) - - def GetSpellChecker(self): - """Get the spell checker object.""" - return self._checker - - def SetSpellChecker(self, chkr): - """Set the spell checker, advancing to the first error. - Return True if error(s) to correct, else False.""" - self._checker = chkr - return self.Advance() - - -def _test(): - class TestDialog(wxSpellCheckerDialog): - def __init__(self, *args): - wxSpellCheckerDialog.__init__(self, *args) - wx.EVT_CLOSE(self, self.OnClose) - - def OnClose(self, evnt): - chkr = dlg.GetSpellChecker() - if chkr is not None: - printf(["AFTER:", chkr.get_text()]) - self.Destroy() - - from enchant.checker import SpellChecker - text = "This is sme text with a fw speling errors in it. Here are a fw more to tst it ut." - printf(["BEFORE:", text]) - app = wx.PySimpleApp() - dlg = TestDialog() - chkr = SpellChecker("en_US", text) - dlg.SetSpellChecker(chkr) - dlg.Show() - app.MainLoop() - - -if __name__ == "__main__": - _test() diff --git a/PACK/enchant/errors.py b/PACK/enchant/errors.py deleted file mode 100644 index cd6930c..0000000 --- a/PACK/enchant/errors.py +++ /dev/null @@ -1,57 +0,0 @@ -# pyenchant -# -# Copyright (C) 2004-2008, Ryan Kelly -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPsE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the -# Free Software Foundation, Inc., 59 Temple Place - Suite 330, -# Boston, MA 02111-1307, USA. -# -# In addition, as a special exception, you are -# given permission to link the code of this program with -# non-LGPL Spelling Provider libraries (eg: a MSFT Office -# spell checker backend) and distribute linked combinations including -# the two. You must obey the GNU Lesser General Public License in all -# respects for all of the code used other than said providers. If you modify -# this file, you may extend this exception to your version of the -# file, but you are not obligated to do so. If you do not wish to -# do so, delete this exception statement from your version. -# -""" -enchant.errors: Error class definitions for the enchant library -================================================================ - -All error classes are defined in this separate sub-module, so that they -can safely be imported without causing circular dependencies. - -""" - - -class Error(Exception): - """Base exception class for the enchant module.""" - pass - - -class DictNotFoundError(Error): - """Exception raised when a requested dictionary could not be found.""" - pass - - -class TokenizerNotFoundError(Error): - """Exception raised when a requested tokenizer could not be found.""" - pass - - -class DefaultLanguageNotFoundError(Error): - """Exception raised when a default language could not be found.""" - pass diff --git a/PACK/enchant/lib/enchant/README.txt b/PACK/enchant/lib/enchant/README.txt deleted file mode 100644 index 1943a29..0000000 --- a/PACK/enchant/lib/enchant/README.txt +++ /dev/null @@ -1,4 +0,0 @@ - -This directory contains the plugin DLLs for enchant when installed on -a Microsoft Windows system. - diff --git a/PACK/enchant/pypwl.py b/PACK/enchant/pypwl.py deleted file mode 100644 index 9356ca9..0000000 --- a/PACK/enchant/pypwl.py +++ /dev/null @@ -1,285 +0,0 @@ -# pyenchant -# -# Copyright (C) 2004-2011 Ryan Kelly -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the -# Free Software Foundation, Inc., 59 Temple Place - Suite 330, -# Boston, MA 02111-1307, USA. -# -# In addition, as a special exception, you are -# given permission to link the code of this program with -# non-LGPL Spelling Provider libraries (eg: a MSFT Office -# spell checker backend) and distribute linked combinations including -# the two. You must obey the GNU Lesser General Public License in all -# respects for all of the code used other than said providers. If you modify -# this file, you may extend this exception to your version of the -# file, but you are not obligated to do so. If you do not wish to -# do so, delete this exception statement from your version. -# -""" - -pypwl: pure-python personal word list in the style of Enchant -============================================================== - -This module provides a pure-python version of the personal word list -functionality found in the spellchecking package Enchant. While the -same effect can be achieved (with better performance) using the python -bindings for Enchant, it requires a C extension. - -This pure-python implementation uses the same algorithm but without any -external dependencies or C code (in fact, it was the author's original -prototype for the C version found in Enchant). - -""" - -from __future__ import generators - -import os -import warnings - - -class Trie: - """Class implementing a trie-based dictionary of words. - - A Trie is a recursive data structure storing words by their prefix. - "Fuzzy matching" can be done by allowing a certain number of missteps - when traversing the Trie. - """ - - def __init__(self, words=()): - self._eos = False # whether I am the end of a word - self._keys = {} # letters at this level of the trie - for w in words: - self.insert(w) - - def insert(self, word): - if word == "": - self._eos = True - else: - key = word[0] - try: - subtrie = self[key] - except KeyError: - subtrie = Trie() - self[key] = subtrie - subtrie.insert(word[1:]) - - def remove(self, word): - if word == "": - self._eos = False - else: - key = word[0] - try: - subtrie = self[key] - except KeyError: - pass - else: - subtrie.remove(word[1:]) - - def search(self, word, nerrs=0): - """Search for the given word, possibly making errors. - - This method searches the trie for the given , making - precisely errors. It returns a list of words found. - """ - res = [] - # Terminate if we've run out of errors - if nerrs < 0: - return res - # Precise match at the end of the word - if nerrs == 0 and word == "": - if self._eos: - res.append("") - # Precisely match word[0] - try: - subtrie = self[word[0]] - subres = subtrie.search(word[1:], nerrs) - for w in subres: - w2 = word[0] + w - if w2 not in res: - res.append(w2) - except (IndexError, KeyError): - pass - # match with deletion of word[0] - try: - subres = self.search(word[1:], nerrs - 1) - for w in subres: - if w not in res: - res.append(w) - except (IndexError,): - pass - # match with insertion before word[0] - try: - for k in self._keys: - subres = self[k].search(word, nerrs - 1) - for w in subres: - w2 = k + w - if w2 not in res: - res.append(w2) - except (IndexError, KeyError): - pass - # match on substitution of word[0] - try: - for k in self._keys: - subres = self[k].search(word[1:], nerrs - 1) - for w in subres: - w2 = k + w - if w2 not in res: - res.append(w2) - except (IndexError, KeyError): - pass - # All done! - return res - - search._DOC_ERRORS = ["nerrs"] - - def __getitem__(self, key): - return self._keys[key] - - def __setitem__(self, key, val): - self._keys[key] = val - - def __iter__(self): - if self._eos: - yield "" - for k in self._keys: - for w2 in self._keys[k]: - yield k + w2 - - -class PyPWL: - """Pure-python implementation of Personal Word List dictionary. - This class emulates the PWL objects provided by PyEnchant, but - implemented purely in python. - """ - - def __init__(self, pwl=None): - """PyPWL constructor. - This method takes as its only argument the name of a file - containing the personal word list, one word per line. Entries - will be read from this file, and new entries will be written to - it automatically. - - If is not specified or None, the list is maintained in - memory only. - """ - self.provider = None - self._words = Trie() - if pwl is not None: - self.pwl = os.path.abspath(pwl) - self.tag = self.pwl - pwlF = file(pwl) - for ln in pwlF: - word = ln.strip() - self.add_to_session(word) - pwlF.close() - else: - self.pwl = None - self.tag = "PyPWL" - - def check(self, word): - """Check spelling of a word. - - This method takes a word in the dictionary language and returns - True if it is correctly spelled, and false otherwise. - """ - res = self._words.search(word) - return bool(res) - - def suggest(self, word): - """Suggest possible spellings for a word. - - This method tries to guess the correct spelling for a given - word, returning the possibilities in a list. - """ - limit = 10 - maxdepth = 5 - # Iterative deepening until we get enough matches - depth = 0 - res = self._words.search(word, depth) - while len(res) < limit and depth < maxdepth: - depth += 1 - for w in self._words.search(word, depth): - if w not in res: - res.append(w) - # Limit number of suggs - return res[:limit] - - def add(self, word): - """Add a word to the user's personal dictionary. - For a PWL, this means appending it to the file. - """ - if self.pwl is not None: - pwlF = file(self.pwl, "a") - pwlF.write("%s\n" % (word.strip(),)) - pwlF.close() - self.add_to_session(word) - - def add_to_pwl(self, word): - """Add a word to the user's personal dictionary. - For a PWL, this means appending it to the file. - """ - warnings.warn("PyPWL.add_to_pwl is deprecated, please use PyPWL.add", - category=DeprecationWarning, stacklevel=2) - self.add(word) - - def remove(self, word): - """Add a word to the user's personal exclude list.""" - # There's no exclude list for a stand-alone PWL. - # Just remove it from the list. - self._words.remove(word) - if self.pwl is not None: - pwlF = file(self.pwl, "wt") - for w in self._words: - pwlF.write("%s\n" % (w.strip(),)) - pwlF.close() - - def add_to_session(self, word): - """Add a word to the session list.""" - self._words.insert(word) - - def is_in_session(self, word): - """Check whether a word is in the session list.""" - warnings.warn("PyPWL.is_in_session is deprecated, please use PyPWL.is_added", category=DeprecationWarning) - # Consider all words to be in the session list - return self.check(word) - - def store_replacement(self, mis, cor): - """Store a replacement spelling for a miss-spelled word. - - This method makes a suggestion to the spellchecking engine that the - miss-spelled word is in fact correctly spelled as . Such - a suggestion will typically mean that appears early in the - list of suggested spellings offered for later instances of . - """ - # Too much work for this simple spellchecker - pass - - store_replacement._DOC_ERRORS = ["mis", "mis"] - - def is_added(self, word): - """Check whether a word is in the personal word list.""" - return self.check(word) - - def is_removed(self, word): - """Check whether a word is in the personal exclude list.""" - return False - - # No-op methods to support internal use as a Dict() replacement - - def _check_this(self, msg): - pass - - def _free(self): - pass diff --git a/PACK/enchant/share/enchant/README.txt b/PACK/enchant/share/enchant/README.txt deleted file mode 100644 index 6d9b66d..0000000 --- a/PACK/enchant/share/enchant/README.txt +++ /dev/null @@ -1,4 +0,0 @@ - -This directory contains dictionary files for Enchant when installed on a -Microsoft Windows system. Each subdirectory contains dictionaries for -a particular spellchecking system. diff --git a/PACK/enchant/share/enchant/ispell/README.txt b/PACK/enchant/share/enchant/ispell/README.txt deleted file mode 100644 index e69de29..0000000 diff --git a/PACK/enchant/share/enchant/myspell/README.txt b/PACK/enchant/share/enchant/myspell/README.txt deleted file mode 100644 index ae8145f..0000000 --- a/PACK/enchant/share/enchant/myspell/README.txt +++ /dev/null @@ -1,3 +0,0 @@ - -This directory contains dictionaries for the myspell backend to enchant. - diff --git a/PACK/enchant/tests.py b/PACK/enchant/tests.py deleted file mode 100644 index c067377..0000000 --- a/PACK/enchant/tests.py +++ /dev/null @@ -1,616 +0,0 @@ -# pyenchant -# -# Copyright (C) 2004-2009, Ryan Kelly -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPsE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the -# Free Software Foundation, Inc., 59 Temple Place - Suite 330, -# Boston, MA 02111-1307, USA. -# -# In addition, as a special exception, you are -# given permission to link the code of this program with -# non-LGPL Spelling Provider libraries (eg: a MSFT Office -# spell checker backend) and distribute linked combinations including -# the two. You must obey the GNU Lesser General Public License in all -# respects for all of the code used other than said providers. If you modify -# this file, you may extend this exception to your version of the -# file, but you are not obligated to do so. If you do not wish to -# do so, delete this exception statement from your version. -# -""" - - enchant.tests: testcases for pyenchant - -""" - -import os -import sys -import unittest -import pickle - -try: - import subprocess -except ImportError: - subprocess = None - -import enchant -from enchant import * -from enchant import _enchant as _e -from enchant.utils import unicode, raw_unicode, printf, trim_suggestions - - -def runcmd(cmd): - if subprocess is not None: - kwds = dict(stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) - p = subprocess.Popen(cmd, **kwds) - (stdout, stderr) = p.communicate() - if p.returncode: - if sys.version_info[0] >= 3: - stderr = stderr.decode(sys.getdefaultencoding(), "replace") - sys.stderr.write(stderr) - return p.returncode - else: - return os.system(cmd) - - -class TestBroker(unittest.TestCase): - """Test cases for the proper functioning of Broker objects. - - These tests assume that there is at least one working provider - with a dictionary for the "en_US" language. - """ - - def setUp(self): - self.broker = Broker() - - def tearDown(self): - del self.broker - - def test_HasENUS(self): - """Test that the en_US language is available.""" - self.assertTrue(self.broker.dict_exists("en_US")) - - def test_LangsAreAvail(self): - """Test whether all advertised languages are in fact available.""" - for lang in self.broker.list_languages(): - if not self.broker.dict_exists(lang): - assert False, "language '" + lang + "' advertised but non-existent" - - def test_ProvsAreAvail(self): - """Test whether all advertised providers are in fact available.""" - for (lang, prov) in self.broker.list_dicts(): - self.assertTrue(self.broker.dict_exists(lang)) - if not self.broker.dict_exists(lang): - assert False, "language '" + lang + "' advertised but non-existent" - if prov not in self.broker.describe(): - assert False, "provier '" + str(prov) + "' advertised but non-existent" - - def test_ProvOrdering(self): - """Test that provider ordering works correctly.""" - langs = {} - provs = [] - # Find the providers for each language, and a list of all providers - for (tag, prov) in self.broker.list_dicts(): - # Skip hyphenation dictionaries installed by OOo - if tag.startswith("hyph_") and prov.name == "myspell": - continue - # Canonicalize separators - tag = tag.replace("-", "_") - langs[tag] = [] - # NOTE: we are excluding Zemberek here as it appears to return - # a broker for any language, even nonexistent ones - if prov not in provs and prov.name != "zemberek": - provs.append(prov) - for prov in provs: - for tag in langs: - b2 = Broker() - b2.set_ordering(tag, prov.name) - try: - d = b2.request_dict(tag) - if d.provider != prov: - raise ValueError() - langs[tag].append(prov) - except: - pass - # Check availability using a single entry in ordering - for tag in langs: - for prov in langs[tag]: - b2 = Broker() - b2.set_ordering(tag, prov.name) - d = b2.request_dict(tag) - self.assertEqual((d.provider, tag), (prov, tag)) - del d - del b2 - # Place providers that dont have the language in the ordering - for tag in langs: - for prov in langs[tag]: - order = prov.name - for prov2 in provs: - if prov2 not in langs[tag]: - order = prov2.name + "," + order - b2 = Broker() - b2.set_ordering(tag, order) - d = b2.request_dict(tag) - self.assertEqual((d.provider, tag, order), (prov, tag, order)) - del d - del b2 - - def test_UnicodeTag(self): - """Test that unicode language tags are accepted""" - d1 = self.broker._request_dict_data(raw_unicode("en_US")) - self.assertTrue(d1) - _e.broker_free_dict(self.broker._this, d1) - d1 = Dict(raw_unicode("en_US")) - self.assertTrue(d1) - - def test_GetSetParam(self): - try: - self.broker.get_param("pyenchant.unittest") - except AttributeError: - return - self.assertEqual(self.broker.get_param("pyenchant.unittest"), None) - self.broker.set_param("pyenchant.unittest", "testing") - self.assertEqual(self.broker.get_param("pyenchant.unittest"), "testing") - self.assertEqual(Broker().get_param("pyenchant.unittest"), None) - - -class TestDict(unittest.TestCase): - """Test cases for the proper functioning of Dict objects. - These tests assume that there is at least one working provider - with a dictionary for the "en_US" language. - """ - - def setUp(self): - self.dict = Dict("en_US") - - def tearDown(self): - del self.dict - - def test_HasENUS(self): - """Test that the en_US language is available through default broker.""" - self.assertTrue(dict_exists("en_US")) - - def test_check(self): - """Test that check() works on some common words.""" - self.assertTrue(self.dict.check("hello")) - self.assertTrue(self.dict.check("test")) - self.assertFalse(self.dict.check("helo")) - self.assertFalse(self.dict.check("testt")) - - def test_broker(self): - """Test that the dict's broker is set correctly.""" - self.assertTrue(self.dict._broker is enchant._broker) - - def test_tag(self): - """Test that the dict's tag is set correctly.""" - self.assertEqual(self.dict.tag, "en_US") - - def test_suggest(self): - """Test that suggest() gets simple suggestions right.""" - self.assertTrue(self.dict.check("hello")) - self.assertTrue("hello" in self.dict.suggest("helo")) - - def test_suggestHang1(self): - """Test whether suggest() hangs on some inputs (Bug #1404196)""" - self.assertTrue(len(self.dict.suggest("Thiis")) >= 0) - self.assertTrue(len(self.dict.suggest("Thiiis")) >= 0) - self.assertTrue(len(self.dict.suggest("Thiiiis")) >= 0) - - def test_unicode1(self): - """Test checking/suggesting for unicode strings""" - # TODO: find something that actually returns suggestions - us1 = raw_unicode(r"he\u2149lo") - self.assertTrue(type(us1) is unicode) - self.assertFalse(self.dict.check(us1)) - for s in self.dict.suggest(us1): - self.assertTrue(type(s) is unicode) - - def test_session(self): - """Test that adding words to the session works as required.""" - self.assertFalse(self.dict.check("Lozz")) - self.assertFalse(self.dict.is_added("Lozz")) - self.dict.add_to_session("Lozz") - self.assertTrue(self.dict.is_added("Lozz")) - self.assertTrue(self.dict.check("Lozz")) - self.dict.remove_from_session("Lozz") - self.assertFalse(self.dict.check("Lozz")) - self.assertFalse(self.dict.is_added("Lozz")) - self.dict.remove_from_session("hello") - self.assertFalse(self.dict.check("hello")) - self.assertTrue(self.dict.is_removed("hello")) - self.dict.add_to_session("hello") - - def test_AddRemove(self): - """Test adding/removing from default user dictionary.""" - nonsense = "kxhjsddsi" - self.assertFalse(self.dict.check(nonsense)) - self.dict.add(nonsense) - self.assertTrue(self.dict.is_added(nonsense)) - self.assertTrue(self.dict.check(nonsense)) - self.dict.remove(nonsense) - self.assertFalse(self.dict.is_added(nonsense)) - self.assertFalse(self.dict.check(nonsense)) - self.dict.remove("pineapple") - self.assertFalse(self.dict.check("pineapple")) - self.assertTrue(self.dict.is_removed("pineapple")) - self.assertFalse(self.dict.is_added("pineapple")) - self.dict.add("pineapple") - self.assertTrue(self.dict.check("pineapple")) - - def test_DefaultLang(self): - """Test behaviour of default language selection.""" - defLang = utils.get_default_language() - if defLang is None: - # If no default language, shouldnt work - self.assertRaises(Error, Dict) - else: - # If there is a default language, should use it - # Of course, no need for the dict to actually exist - try: - d = Dict() - self.assertEqual(d.tag, defLang) - except DictNotFoundError: - pass - - def test_pickling(self): - """Test that pickling doensn't corrupt internal state.""" - d1 = Dict("en") - self.assertTrue(d1.check("hello")) - d2 = pickle.loads(pickle.dumps(d1)) - self.assertTrue(d1.check("hello")) - self.assertTrue(d2.check("hello")) - d1._free() - self.assertTrue(d2.check("hello")) - - -class TestPWL(unittest.TestCase): - """Test cases for the proper functioning of PWLs and DictWithPWL objects. - These tests assume that there is at least one working provider - with a dictionary for the "en_US" language. - """ - - def setUp(self): - self._tempDir = self._mkdtemp() - self._fileName = "pwl.txt" - - def tearDown(self): - import shutil - shutil.rmtree(self._tempDir) - - def _mkdtemp(self): - import tempfile - return tempfile.mkdtemp() - - def _path(self, nm=None): - if nm is None: - nm = self._fileName - nm = os.path.join(self._tempDir, nm) - if not os.path.exists(nm): - open(nm, 'w').close() - return nm - - def setPWLContents(self, contents): - """Set the contents of the PWL file.""" - pwlFile = open(self._path(), "w") - for ln in contents: - pwlFile.write(ln) - pwlFile.write("\n") - pwlFile.flush() - pwlFile.close() - - def getPWLContents(self): - """Retrieve the contents of the PWL file.""" - pwlFile = open(self._path(), "r") - contents = pwlFile.readlines() - pwlFile.close() - return [c.strip() for c in contents] - - def test_check(self): - """Test that basic checking works for PWLs.""" - self.setPWLContents(["Sazz", "Lozz"]) - d = request_pwl_dict(self._path()) - self.assertTrue(d.check("Sazz")) - self.assertTrue(d.check("Lozz")) - self.assertFalse(d.check("hello")) - - def test_UnicodeFN(self): - """Test that unicode PWL filenames are accepted.""" - d = request_pwl_dict(unicode(self._path())) - self.assertTrue(d) - - def test_add(self): - """Test that adding words to a PWL works correctly.""" - d = request_pwl_dict(self._path()) - self.assertFalse(d.check("Flagen")) - d.add("Esquilax") - d.add("Esquilam") - self.assertTrue(d.check("Esquilax")) - self.assertTrue("Esquilax" in self.getPWLContents()) - self.assertTrue(d.is_added("Esquilax")) - - def test_suggestions(self): - """Test getting suggestions from a PWL.""" - self.setPWLContents(["Sazz", "Lozz"]) - d = request_pwl_dict(self._path()) - self.assertTrue("Sazz" in d.suggest("Saz")) - self.assertTrue("Lozz" in d.suggest("laz")) - self.assertTrue("Sazz" in d.suggest("laz")) - d.add("Flagen") - self.assertTrue("Flagen" in d.suggest("Flags")) - self.assertFalse("sazz" in d.suggest("Flags")) - - def test_DWPWL(self): - """Test functionality of DictWithPWL.""" - self.setPWLContents(["Sazz", "Lozz"]) - d = DictWithPWL("en_US", self._path(), self._path("pel.txt")) - self.assertTrue(d.check("Sazz")) - self.assertTrue(d.check("Lozz")) - self.assertTrue(d.check("hello")) - self.assertFalse(d.check("helo")) - self.assertFalse(d.check("Flagen")) - d.add("Flagen") - self.assertTrue(d.check("Flagen")) - self.assertTrue("Flagen" in self.getPWLContents()) - self.assertTrue("Flagen" in d.suggest("Flagn")) - self.assertTrue("hello" in d.suggest("helo")) - d.remove("hello") - self.assertFalse(d.check("hello")) - self.assertTrue("hello" not in d.suggest("helo")) - d.remove("Lozz") - self.assertFalse(d.check("Lozz")) - - def test_DWPWL_empty(self): - """Test functionality of DictWithPWL using transient dicts.""" - d = DictWithPWL("en_US", None, None) - self.assertTrue(d.check("hello")) - self.assertFalse(d.check("helo")) - self.assertFalse(d.check("Flagen")) - d.add("Flagen") - self.assertTrue(d.check("Flagen")) - d.remove("hello") - self.assertFalse(d.check("hello")) - d.add("hello") - self.assertTrue(d.check("hello")) - - def test_PyPWL(self): - """Test our pure-python PWL implementation.""" - d = PyPWL() - self.assertTrue(list(d._words) == []) - d.add("hello") - d.add("there") - d.add("duck") - ws = list(d._words) - self.assertTrue(len(ws) == 3) - self.assertTrue("hello" in ws) - self.assertTrue("there" in ws) - self.assertTrue("duck" in ws) - d.remove("duck") - d.remove("notinthere") - ws = list(d._words) - self.assertTrue(len(ws) == 2) - self.assertTrue("hello" in ws) - self.assertTrue("there" in ws) - - def test_UnicodeCharsInPath(self): - """Test that unicode chars in PWL paths are accepted.""" - self._fileName = raw_unicode(r"test_\xe5\xe4\xf6_ing") - d = request_pwl_dict(self._path()) - self.assertTrue(d) - - -class TestUtils(unittest.TestCase): - """Test cases for various utility functions.""" - - def test_trim_suggestions(self): - word = "gud" - suggs = ["good", "god", "bad+"] - self.assertEquals(trim_suggestions(word, suggs, 40), ["god", "good", "bad+"]) - self.assertEquals(trim_suggestions(word, suggs, 4), ["god", "good", "bad+"]) - self.assertEquals(trim_suggestions(word, suggs, 3), ["god", "good", "bad+"]) - self.assertEquals(trim_suggestions(word, suggs, 2), ["god", "good"]) - self.assertEquals(trim_suggestions(word, suggs, 1), ["god"]) - self.assertEquals(trim_suggestions(word, suggs, 0), []) - - -class TestDocStrings(unittest.TestCase): - """Test the spelling on all docstrings we can find in this module. - - This serves two purposes - to provide a lot of test data for the - checker routines, and to make sure we don't suffer the embarrassment - of having spelling errors in a spellchecking package! - """ - - WORDS = ["spellchecking", "utf", "dict", "unicode", "bytestring", "bytestrings", - "str", "pyenchant", "ascii", "utils", "setup", "distutils", "pkg", - "filename", "tokenization", "tuple", "tuples", "tokenizer", - "tokenizers", "testcase", "testcases", "whitespace", "wxpython", - "spellchecker", "dialog", "urls", "wikiwords", "enchantobject", - "providerdesc", "spellcheck", "pwl", "aspell", "myspell", - "docstring", "docstrings", "stopiteration", "pwls", "pypwl", - "dictwithpwl", "skippable", "dicts", "dict's", "filenames", - "trie", "api", "ctypes", "wxspellcheckerdialog", "stateful", - "cmdlinechecker", "spellchecks", "callback", "clunkier", "iterator", - "ispell", "cor", "backends"] - - def test_docstrings(self): - """Test that all our docstrings are error-free.""" - import enchant - import enchant.utils - import enchant.pypwl - import enchant.tokenize - import enchant.tokenize.en - import enchant.checker - import enchant.checker.CmdLineChecker - try: - import enchant.checker.GtkSpellCheckerDialog - except ImportError: - pass - try: - import enchant.checker.wxSpellCheckerDialog - except ImportError: - pass - errors = [] - # Naive recursion here would blow the stack, instead we - # simulate it with our own stack - tocheck = [enchant] - checked = [] - while tocheck: - obj = tocheck.pop() - checked.append(obj) - newobjs = list(self._check_docstrings(obj, errors)) - tocheck.extend([obj for obj in newobjs if obj not in checked]) - self.assertEqual(len(errors), 0) - - def _check_docstrings(self, obj, errors): - import enchant - if hasattr(obj, "__doc__"): - skip_errors = [w for w in getattr(obj, "_DOC_ERRORS", [])] - chkr = enchant.checker.SpellChecker("en_AU", obj.__doc__, filters=[enchant.tokenize.URLFilter]) - for err in chkr: - if len(err.word) == 1: - continue - if err.word.lower() in self.WORDS: - continue - if skip_errors and skip_errors[0] == err.word: - skip_errors.pop(0) - continue - errors.append((obj, err.word, err.wordpos)) - msg = "\nDOCSTRING SPELLING ERROR: %s %s %d %s\n" % (obj, err.word, err.wordpos, chkr.suggest()) - printf([msg], file=sys.stderr) - # Find and yield all child objects that should be checked - for name in dir(obj): - if name.startswith("__"): - continue - child = getattr(obj, name) - if hasattr(child, "__file__"): - if not hasattr(globals(), "__file__"): - continue - if not child.__file__.startswith(os.path.dirname(__file__)): - continue - else: - cmod = getattr(child, "__module__", None) - if not cmod: - cclass = getattr(child, "__class__", None) - cmod = getattr(cclass, "__module__", None) - if cmod and not cmod.startswith("enchant"): - continue - yield child - - -class TestInstallEnv(unittest.TestCase): - """Run all testcases in a variety of install environments.""" - - def setUp(self): - self._tempDir = self._mkdtemp() - self._insDir = "build" - - def tearDown(self): - import shutil - shutil.rmtree(self._tempDir) - - def _mkdtemp(self): - import tempfile - return tempfile.mkdtemp() - - def install(self): - import os, sys, shutil - insdir = os.path.join(self._tempDir, self._insDir) - os.makedirs(insdir) - shutil.copytree("enchant", os.path.join(insdir, "enchant")) - - def runtests(self): - import os, sys - insdir = os.path.join(self._tempDir, self._insDir) - if str is not unicode and isinstance(insdir, unicode): - insdir = insdir.encode(sys.getfilesystemencoding()) - os.environ["PYTHONPATH"] = insdir - script = os.path.join(insdir, "enchant", "__init__.py") - res = runcmd("\"%s\" %s" % (sys.executable, script,)) - self.assertEqual(res, 0) - - def test_basic(self): - """Test proper functioning of TestInstallEnv suite.""" - self.install() - self.runtests() - - test_basic._DOC_ERRORS = ["TestInstallEnv"] - - def test_UnicodeInstallPath(self): - """Test installation in a path containing unicode chars.""" - self._insDir = raw_unicode(r'test_\xe5\xe4\xf6_ing') - self.install() - self.runtests() - - -class TestPy2exe(unittest.TestCase): - """Run all testcases inside a py2exe executable""" - _DOC_ERRORS = ["py", "exe"] - - def setUp(self): - self._tempDir = self._mkdtemp() - - def tearDown(self): - import shutil - shutil.rmtree(self._tempDir) - - def test_py2exe(self): - """Test pyenchant running inside a py2exe executable.""" - import os, sys, shutil - from os import path - from os.path import dirname - try: - import py2exe - except ImportError: - return - os.environ["PYTHONPATH"] = dirname(dirname(__file__)) - setup_py = path.join(dirname(__file__), "..", "tools", "setup.py2exe.py") - if not path.exists(setup_py): - return - buildCmd = '%s %s -q py2exe --dist-dir="%s"' - buildCmd = buildCmd % (sys.executable, setup_py, self._tempDir) - res = runcmd(buildCmd) - self.assertEqual(res, 0) - testCmd = self._tempDir + "\\test_pyenchant.exe" - self.assertTrue(os.path.exists(testCmd)) - res = runcmd(testCmd) - self.assertEqual(res, 0) - - test_py2exe._DOC_ERRORS = ["py", "exe"] - - def _mkdtemp(self): - import tempfile - return tempfile.mkdtemp() - - -def buildtestsuite(recurse=True): - from enchant.checker.tests import TestChecker - from enchant.tokenize.tests import TestTokenization, TestFilters - from enchant.tokenize.tests import TestTokenizeEN - suite = unittest.TestSuite() - if recurse: - suite.addTest(unittest.makeSuite(TestInstallEnv)) - suite.addTest(unittest.makeSuite(TestPy2exe)) - suite.addTest(unittest.makeSuite(TestBroker)) - suite.addTest(unittest.makeSuite(TestDict)) - suite.addTest(unittest.makeSuite(TestPWL)) - suite.addTest(unittest.makeSuite(TestUtils)) - suite.addTest(unittest.makeSuite(TestDocStrings)) - suite.addTest(unittest.makeSuite(TestChecker)) - suite.addTest(unittest.makeSuite(TestTokenization)) - suite.addTest(unittest.makeSuite(TestTokenizeEN)) - suite.addTest(unittest.makeSuite(TestFilters)) - return suite - - -def runtestsuite(recurse=False): - return unittest.TextTestRunner(verbosity=0).run(buildtestsuite(recurse=recurse)) diff --git a/PACK/enchant/tokenize/__init__.py b/PACK/enchant/tokenize/__init__.py deleted file mode 100644 index 50514da..0000000 --- a/PACK/enchant/tokenize/__init__.py +++ /dev/null @@ -1,536 +0,0 @@ -# pyenchant -# -# Copyright (C) 2004-2009, Ryan Kelly -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the -# Free Software Foundation, Inc., 59 Temple Place - Suite 330, -# Boston, MA 02111-1307, USA. -# -# In addition, as a special exception, you are -# given permission to link the code of this program with -# non-LGPL Spelling Provider libraries (eg: a MSFT Office -# spell checker backend) and distribute linked combinations including -# the two. You must obey the GNU Lesser General Public License in all -# respects for all of the code used other than said providers. If you modify -# this file, you may extend this exception to your version of the -# file, but you are not obligated to do so. If you do not wish to -# do so, delete this exception statement from your version. -# -""" - -enchant.tokenize: String tokenization functions for PyEnchant -================================================================ - -An important task in spellchecking is breaking up large bodies of -text into their constituent words, each of which is then checked -for correctness. This package provides Python functions to split -strings into words according to the rules of a particular language. - -Each tokenization function accepts a string as its only positional -argument, and returns an iterator that yields tuples of the following -form, one for each word found:: - - (,) - -The meanings of these fields should be clear: is the word -that was found and is the position within the text at which -the word began (zero indexed, of course). The function will work -on any string-like object that supports array-slicing; in particular -character-array objects from the 'array' module may be used. - -The iterator also provides the attribute 'offset' which gives the current -position of the tokenizer inside the string being split, and the method -'set_offset' for manually adjusting this position. This can be used for -example if the string's contents have changed during the tokenization -process. - -To obtain an appropriate tokenization function for the language -identified by , use the function 'get_tokenizer(tag)':: - - tknzr = get_tokenizer("en_US") - for (word,pos) in tknzr("text to be tokenized goes here") - do_something(word) - -This library is designed to be easily extendible by third-party -authors. To register a tokenization function for the language -, implement it as the function 'tokenize' within the -module enchant.tokenize.. The 'get_tokenizer' function -will automatically detect it. Note that the underscore must be -used as the tag component separator in this case, in order to -form a valid python module name. (e.g. "en_US" rather than "en-US") - -Currently, a tokenizer has only been implemented for the English -language. Based on the author's limited experience, this should -be at least partially suitable for other languages. - -This module also provides various implementations of "Chunkers" and -"Filters". These classes are designed to make it easy to work with -text in a vareity of common formats, by detecting and excluding parts -of the text that don't need to be checked. - -A Chunker is a class designed to break a body of text into large chunks -of checkable content; for example the HTMLChunker class extracts the -text content from all HTML tags but excludes the tags themselves. -A Filter is a class designed to skip individual words during the checking -process; for example the URLFilter class skips over any words that -have the format of a URL. - -For exmaple, to spellcheck an HTML document it is necessary to split the -text into chunks based on HTML tags, and to filter out common word forms -such as URLs and WikiWords. This would look something like the following:: - - tknzr = get_tokenier("en_US",(HTMLChunker,),(URLFilter,WikiWordFilter))) - - text = "the url is http://example.com" - for (word,pos) in tknzer(text): - ...check each word and react accordingly... - -""" -_DOC_ERRORS = ["pos", "pos", "tknzr", "URLFilter", "WikiWordFilter", - "tkns", "tknzr", "pos", "tkns"] - -import re -import warnings - -import enchant -from enchant.utils import next, xrange -from enchant.errors import * - -# For backwards-compatability. This will eventually be removed, but how -# does one mark a module-level constant as deprecated? -Error = TokenizerNotFoundError - - -class tokenize: - """Base class for all tokenizer objects. - - Each tokenizer must be an iterator and provide the 'offset' - attribute as described in the documentation for this module. - - While tokenizers are in fact classes, they should be treated - like functions, and so are named using lower_case rather than - the CamelCase more traditional of class names. - """ - _DOC_ERRORS = ["CamelCase"] - - def __init__(self, text): - self._text = text - self._offset = 0 - - def __next__(self): - return self.next() - - def next(self): - raise NotImplementedError() - - def __iter__(self): - return self - - def set_offset(self, offset, replaced=False): - self._offset = offset - - def _get_offset(self): - return self._offset - - def _set_offset(self, offset): - msg = "changing a tokenizers 'offset' attribute is deprecated;" \ - " use the 'set_offset' method" - warnings.warn(msg, category=DeprecationWarning, stacklevel=2) - self.set_offset(offset) - - offset = property(_get_offset, _set_offset) - - -def get_tokenizer(tag=None, chunkers=None, filters=None): - """Locate an appropriate tokenizer by language tag. - - This requires importing the function 'tokenize' from an appropriate - module. Modules tried are named after the language tag, tried in the - following order: - * the entire tag (e.g. "en_AU.py") - * the base country code of the tag (e.g. "en.py") - - If the language tag is None, a default tokenizer (actually the English - one) is returned. It's unicode aware and should work OK for most - latin-derived languages. - - If a suitable function cannot be found, raises TokenizerNotFoundError. - - If given and not None, 'chunkers' and 'filters' must be lists of chunker - classes and filter classes resectively. These will be applied to the - tokenizer during creation. - """ - if tag is None: - tag = "en" - # "filters" used to be the second argument. Try to catch cases - # where it is given positionally and issue a DeprecationWarning. - if chunkers is not None and filters is None: - chunkers = list(chunkers) - if chunkers: - try: - chunkers_are_filters = issubclass(chunkers[0], Filter) - except TypeError: - pass - else: - if chunkers_are_filters: - msg = "passing 'filters' as a non-keyword argument " \ - "to get_tokenizer() is deprecated" - warnings.warn(msg, category=DeprecationWarning, stacklevel=2) - filters = chunkers - chunkers = None - # Ensure only '_' used as separator - tag = tag.replace("-", "_") - # First try the whole tag - tkFunc = _try_tokenizer(tag) - if tkFunc is None: - # Try just the base - base = tag.split("_")[0] - tkFunc = _try_tokenizer(base) - if tkFunc is None: - msg = "No tokenizer found for language '%s'" % (tag,) - raise TokenizerNotFoundError(msg) - # Given the language-specific tokenizer, we now build up the - # end result as follows: - # * chunk the text using any given chunkers in turn - # * begin with basic whitespace tokenization - # * apply each of the given filters in turn - # * apply language-specific rules - tokenizer = basic_tokenize - if chunkers is not None: - chunkers = list(chunkers) - for i in xrange(len(chunkers) - 1, -1, -1): - tokenizer = wrap_tokenizer(chunkers[i], tokenizer) - if filters is not None: - for f in filters: - tokenizer = f(tokenizer) - tokenizer = wrap_tokenizer(tokenizer, tkFunc) - return tokenizer - - -get_tokenizer._DOC_ERRORS = ["py", "py"] - - -class empty_tokenize(tokenize): - """Tokenizer class that yields no elements.""" - _DOC_ERRORS = [] - - def __init__(self): - tokenize.__init__(self, "") - - def next(self): - raise StopIteration() - - -class unit_tokenize(tokenize): - """Tokenizer class that yields the text as a single token.""" - _DOC_ERRORS = [] - - def __init__(self, text): - tokenize.__init__(self, text) - self._done = False - - def next(self): - if self._done: - raise StopIteration() - self._done = True - return (self._text, 0) - - -class basic_tokenize(tokenize): - """Tokenizer class that performs very basic word-finding. - - This tokenizer does the most basic thing that could work - it splits - text into words based on whitespace boundaries, and removes basic - punctuation symbols from the start and end of each word. - """ - _DOC_ERRORS = [] - - # Chars to remove from start/end of words - strip_from_start = '"' + "'`([" - strip_from_end = '"' + "'`]).!,?;:" - - def next(self): - text = self._text - offset = self._offset - while True: - if offset >= len(text): - break - # Find start of next word - while offset < len(text) and text[offset].isspace(): - offset += 1 - sPos = offset - # Find end of word - while offset < len(text) and not text[offset].isspace(): - offset += 1 - ePos = offset - self._offset = offset - # Strip chars from font/end of word - while sPos < len(text) and text[sPos] in self.strip_from_start: - sPos += 1 - while 0 < ePos and text[ePos - 1] in self.strip_from_end: - ePos -= 1 - # Return if word isnt empty - if (sPos < ePos): - return (text[sPos:ePos], sPos) - raise StopIteration() - - -def _try_tokenizer(modName): - """Look for a tokenizer in the named module. - - Returns the function if found, None otherwise. - """ - modBase = "enchant.tokenize." - funcName = "tokenize" - modName = modBase + modName - try: - mod = __import__(modName, globals(), {}, funcName) - return getattr(mod, funcName) - except ImportError: - return None - - -def wrap_tokenizer(tk1, tk2): - """Wrap one tokenizer inside another. - - This function takes two tokenizer functions 'tk1' and 'tk2', - and returns a new tokenizer function that passes the output - of tk1 through tk2 before yielding it to the calling code. - """ - # This logic is already implemented in the Filter class. - # We simply use tk2 as the _split() method for a filter - # around tk1. - tkW = Filter(tk1) - tkW._split = tk2 - return tkW - - -wrap_tokenizer._DOC_ERRORS = ["tk", "tk", "tk", "tk"] - - -class Chunker(tokenize): - """Base class for text chunking functions. - - A chunker is designed to chunk text into large blocks of tokens. It - has the same interface as a tokenizer but is for a different purpose. - """ - pass - - -class Filter(object): - """Base class for token filtering functions. - - A filter is designed to wrap a tokenizer (or another filter) and do - two things: - - * skip over tokens - * split tokens into sub-tokens - - Subclasses have two basic options for customising their behaviour. The - method _skip(word) may be overridden to return True for words that - should be skipped, and false otherwise. The method _split(word) may - be overridden as tokenization function that will be applied to further - tokenize any words that aren't skipped. - """ - - def __init__(self, tokenizer): - """Filter class constructor.""" - self._tokenizer = tokenizer - - def __call__(self, *args, **kwds): - tkn = self._tokenizer(*args, **kwds) - return self._TokenFilter(tkn, self._skip, self._split) - - def _skip(self, word): - """Filter method for identifying skippable tokens. - - If this method returns true, the given word will be skipped by - the filter. This should be overridden in subclasses to produce the - desired functionality. The default behaviour is not to skip any words. - """ - return False - - def _split(self, word): - """Filter method for sub-tokenization of tokens. - - This method must be a tokenization function that will split the - given word into sub-tokens according to the needs of the filter. - The default behaviour is not to split any words. - """ - return unit_tokenize(word) - - class _TokenFilter(object): - """Private inner class implementing the tokenizer-wrapping logic. - - This might seem convoluted, but we're trying to create something - akin to a meta-class - when Filter(tknzr) is called it must return - a *callable* that can then be applied to a particular string to - perform the tokenization. Since we need to manage a lot of state - during tokenization, returning a class is the best option. - """ - _DOC_ERRORS = ["tknzr"] - - def __init__(self, tokenizer, skip, split): - self._skip = skip - self._split = split - self._tokenizer = tokenizer - # for managing state of sub-tokenization - self._curtok = empty_tokenize() - self._curword = "" - self._curpos = 0 - - def __iter__(self): - return self - - def __next__(self): - return self.next() - - def next(self): - # Try to get the next sub-token from word currently being split. - # If unavailable, move on to the next word and try again. - try: - (word, pos) = next(self._curtok) - return (word, pos + self._curpos) - except StopIteration: - (word, pos) = next(self._tokenizer) - while self._skip(word): - (word, pos) = next(self._tokenizer) - self._curword = word - self._curpos = pos - self._curtok = self._split(word) - return self.next() - - # Pass on access to 'offset' to the underlying tokenizer. - def _get_offset(self): - return self._tokenizer.offset - - def _set_offset(self, offset): - msg = "changing a tokenizers 'offset' attribute is deprecated;" \ - " use the 'set_offset' method" - warnings.warn(msg, category=DeprecationWarning, stacklevel=2) - self.set_offset(offset) - - offset = property(_get_offset, _set_offset) - - def set_offset(self, val, replaced=False): - self._tokenizer.set_offset(val, replaced=replaced) - # If we stay within the current word, also set on _curtok. - # Otherwise, throw away _curtok and set to empty iterator. - subval = val - self._curpos - if subval >= 0 and subval < len(self._curword) and not replaced: - self._curtok.set_offset(subval) - else: - self._curtok = empty_tokenize() - self._curword = "" - self._curpos = 0 - - -# Pre-defined chunkers and filters start here - -class URLFilter(Filter): - """Filter skipping over URLs. - This filter skips any words matching the following regular expression: - - ^[a-zA-z]+:\/\/[^\s].* - - That is, any words that are URLs. - """ - _DOC_ERRORS = ["zA"] - _pattern = re.compile(r"^[a-zA-z]+:\/\/[^\s].*") - - def _skip(self, word): - if self._pattern.match(word): - return True - return False - - -class WikiWordFilter(Filter): - """Filter skipping over WikiWords. - This filter skips any words matching the following regular expression: - - ^([A-Z]\w+[A-Z]+\w+) - - That is, any words that are WikiWords. - """ - _pattern = re.compile(r"^([A-Z]\w+[A-Z]+\w+)") - - def _skip(self, word): - if self._pattern.match(word): - return True - return False - - -class EmailFilter(Filter): - """Filter skipping over email addresses. - This filter skips any words matching the following regular expression: - - ^.+@[^\.].*\.[a-z]{2,}$ - - That is, any words that resemble email addresses. - """ - _pattern = re.compile(r"^.+@[^\.].*\.[a-z]{2,}$") - - def _skip(self, word): - if self._pattern.match(word): - return True - return False - - -class HTMLChunker(Chunker): - """Chunker for breaking up HTML documents into chunks of checkable text. - - The operation of this chunker is very simple - anything between a "<" - and a ">" will be ignored. Later versions may improve the algorithm - slightly. - """ - - def next(self): - text = self._text - offset = self.offset - while True: - if offset >= len(text): - break - # Skip to the end of the current tag, if any. - if text[offset] == "<": - maybeTag = offset - if self._is_tag(text, offset): - while text[offset] != ">": - offset += 1 - if offset == len(text): - offset = maybeTag + 1 - break - else: - offset += 1 - else: - offset = maybeTag + 1 - sPos = offset - # Find the start of the next tag. - while offset < len(text) and text[offset] != "<": - offset += 1 - ePos = offset - self._offset = offset - # Return if chunk isnt empty - if (sPos < offset): - return (text[sPos:offset], sPos) - raise StopIteration() - - def _is_tag(self, text, offset): - if offset + 1 < len(text): - if text[offset + 1].isalpha(): - return True - if text[offset + 1] == "/": - return True - return False - -# TODO: LaTeXChunker diff --git a/PACK/enchant/tokenize/en.py b/PACK/enchant/tokenize/en.py deleted file mode 100644 index 8ee1204..0000000 --- a/PACK/enchant/tokenize/en.py +++ /dev/null @@ -1,172 +0,0 @@ -# pyenchant -# -# Copyright (C) 2004-2008, Ryan Kelly -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the -# Free Software Foundation, Inc., 59 Temple Place - Suite 330, -# Boston, MA 02111-1307, USA. -# -# In addition, as a special exception, you are -# given permission to link the code of this program with -# non-LGPL Spelling Provider libraries (eg: a MSFT Office -# spell checker backend) and distribute linked combinations including -# the two. You must obey the GNU Lesser General Public License in all -# respects for all of the code used other than said providers. If you modify -# this file, you may extend this exception to your version of the -# file, but you are not obligated to do so. If you do not wish to -# do so, delete this exception statement from your version. -# -""" - - enchant.tokenize.en: Tokenizer for the English language - - This module implements a PyEnchant text tokenizer for the English - language, based on very simple rules. - -""" - -import unicodedata - -import enchant.tokenize -from enchant.utils import unicode - - -class tokenize(enchant.tokenize.tokenize): - """Iterator splitting text into words, reporting position. - - This iterator takes a text string as input, and yields tuples - representing each distinct word found in the text. The tuples - take the form: - - (,) - - Where is the word string found and is the position - of the start of the word within the text. - - The optional argument may be used to specify a - list of additional characters that can form part of a word. - By default, this list contains only the apostrophe ('). Note that - these characters cannot appear at the start or end of a word. - """ - - _DOC_ERRORS = ["pos", "pos"] - - def __init__(self, text, valid_chars=("'",)): - self._valid_chars = valid_chars - self._text = text - self._offset = 0 - # Select proper implementation of self._consume_alpha. - # 'text' isn't necessarily a string (it could be e.g. a mutable array) - # so we can't use isinstance(text,unicode) to detect unicode. - # Instead we typetest the first character of the text. - # If there's no characters then it doesn't matter what implementation - # we use since it won't be called anyway. - try: - char1 = text[0] - except IndexError: - self._consume_alpha = self._consume_alpha_b - else: - if isinstance(char1, unicode): - self._consume_alpha = self._consume_alpha_u - else: - self._consume_alpha = self._consume_alpha_b - - def _consume_alpha_b(self, text, offset): - """Consume an alphabetic character from the given bytestring. - - Given a bytestring and the current offset, this method returns - the number of characters occupied by the next alphabetic character - in the string. Non-ASCII bytes are interpreted as utf-8 and can - result in multiple characters being consumed. - """ - assert offset < len(text) - if text[offset].isalpha(): - return 1 - elif text[offset] >= "\x80": - return self._consume_alpha_utf8(text, offset) - return 0 - - def _consume_alpha_utf8(self, text, offset): - """Consume a sequence of utf8 bytes forming an alphabetic character.""" - incr = 2 - u = "" - while not u and incr <= 4: - try: - try: - # In the common case this will be a string - u = text[offset:offset + incr].decode("utf8") - except AttributeError: - # Looks like it was e.g. a mutable char array. - try: - s = text[offset:offset + incr].tostring() - except AttributeError: - s = "".join([c for c in text[offset:offset + incr]]) - u = s.decode("utf8") - except UnicodeDecodeError: - incr += 1 - if not u: - return 0 - if u.isalpha(): - return incr - if unicodedata.category(u)[0] == "M": - return incr - return 0 - - def _consume_alpha_u(self, text, offset): - """Consume an alphabetic character from the given unicode string. - - Given a unicode string and the current offset, this method returns - the number of characters occupied by the next alphabetic character - in the string. Trailing combining characters are consumed as a - single letter. - """ - assert offset < len(text) - incr = 0 - if text[offset].isalpha(): - incr = 1 - while offset + incr < len(text): - if unicodedata.category(text[offset + incr])[0] != "M": - break - incr += 1 - return incr - - def next(self): - text = self._text - offset = self._offset - while offset < len(text): - # Find start of next word (must be alpha) - while offset < len(text): - incr = self._consume_alpha(text, offset) - if incr: - break - offset += 1 - curPos = offset - # Find end of word using, allowing valid_chars - while offset < len(text): - incr = self._consume_alpha(text, offset) - if not incr: - if text[offset] in self._valid_chars: - incr = 1 - else: - break - offset += incr - # Return if word isnt empty - if (curPos != offset): - # Make sure word doesn't end with a valid_char - while text[offset - 1] in self._valid_chars: - offset = offset - 1 - self._offset = offset - return (text[curPos:offset], curPos) - self._offset = offset - raise StopIteration() diff --git a/PACK/enchant/tokenize/tests.py b/PACK/enchant/tokenize/tests.py deleted file mode 100644 index 569b593..0000000 --- a/PACK/enchant/tokenize/tests.py +++ /dev/null @@ -1,326 +0,0 @@ -# pyenchant -# -# Copyright (C) 2004-2008, Ryan Kelly -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the -# Free Software Foundation, Inc., 59 Temple Place - Suite 330, -# Boston, MA 02111-1307, USA. -# -# In addition, as a special exception, you are -# given permission to link the code of this program with -# non-LGPL Spelling Provider libraries (eg: a MSFT Office -# spell checker backend) and distribute linked combinations including -# the two. You must obey the GNU Lesser General Public License in all -# respects for all of the code used other than said providers. If you modify -# this file, you may extend this exception to your version of the -# file, but you are not obligated to do so. If you do not wish to -# do so, delete this exception statement from your version. -# -""" - - enchant.tokenize.tests: unittests for enchant tokenization functions. - -""" - -import unittest -import array - -from enchant.tokenize import * -from enchant.tokenize.en import tokenize as tokenize_en -from enchant.utils import raw_unicode, unicode, bytes - - -class TestTokenization(unittest.TestCase): - """TestCases for testing the basic tokenization functionality.""" - - def test_basic_tokenize(self): - """Simple regression test for basic white-space tokenization.""" - input = """This is a paragraph. It's not very special, but it's designed -2 show how the splitter works with many-different combos -of words. Also need to "test" the (handling) of 'quoted' words.""" - output = [ - ("This", 0), ("is", 5), ("a", 8), ("paragraph", 10), ("It's", 22), - ("not", 27), ("very", 31), ("special", 36), ("but", 45), ("it's", 49), - ("designed", 54), ("2", 63), ("show", 65), ("how", 70), ("the", 74), - ("splitter", 78), ("works", 87), ("with", 93), ("many-different", 98), - ("combos", 113), ("of", 120), ("words", 123), - ("Also", 130), ("need", 135), - ("to", 140), ("test", 144), ("the", 150), ("handling", 155), - ("of", 165), ("quoted", 169), ("words", 177) - ] - self.assertEqual(output, [i for i in basic_tokenize(input)]) - for (itmO, itmV) in zip(output, basic_tokenize(input)): - self.assertEqual(itmO, itmV) - - def test_tokenize_strip(self): - """Test special-char-stripping edge-cases in basic_tokenize.""" - input = "((' \"\" 'text' has (lots) of (special chars} >>]" - output = [("", 4), ("text", 15), ("has", 21), ("lots", 26), ("of", 32), - ("special", 36), ("chars}", 44), (">>", 51)] - self.assertEqual(output, [i for i in basic_tokenize(input)]) - for (itmO, itmV) in zip(output, basic_tokenize(input)): - self.assertEqual(itmO, itmV) - - def test_wrap_tokenizer(self): - """Test wrapping of one tokenizer with another.""" - input = "this-string will be split@according to diff'rnt rules" - from enchant.tokenize import en - tknzr = wrap_tokenizer(basic_tokenize, en.tokenize) - tknzr = tknzr(input) - self.assertEqual(tknzr._tokenizer.__class__, basic_tokenize) - self.assertEqual(tknzr._tokenizer.offset, 0) - for (n, (word, pos)) in enumerate(tknzr): - if n == 0: - self.assertEqual(pos, 0) - self.assertEqual(word, "this") - if n == 1: - self.assertEqual(pos, 5) - self.assertEqual(word, "string") - if n == 2: - self.assertEqual(pos, 12) - self.assertEqual(word, "will") - # Test setting offset to a previous token - tknzr.set_offset(5) - self.assertEqual(tknzr.offset, 5) - self.assertEqual(tknzr._tokenizer.offset, 5) - self.assertEqual(tknzr._curtok.__class__, empty_tokenize) - if n == 3: - self.assertEqual(word, "string") - self.assertEqual(pos, 5) - if n == 4: - self.assertEqual(pos, 12) - self.assertEqual(word, "will") - if n == 5: - self.assertEqual(pos, 17) - self.assertEqual(word, "be") - # Test setting offset past the current token - tknzr.set_offset(20) - self.assertEqual(tknzr.offset, 20) - self.assertEqual(tknzr._tokenizer.offset, 20) - self.assertEqual(tknzr._curtok.__class__, empty_tokenize) - if n == 6: - self.assertEqual(pos, 20) - self.assertEqual(word, "split") - if n == 7: - self.assertEqual(pos, 26) - self.assertEqual(word, "according") - # Test setting offset to middle of current token - tknzr.set_offset(23) - self.assertEqual(tknzr.offset, 23) - self.assertEqual(tknzr._tokenizer.offset, 23) - self.assertEqual(tknzr._curtok.offset, 3) - if n == 8: - self.assertEqual(pos, 23) - self.assertEqual(word, "it") - # OK, I'm pretty happy with the behaviour, no need to - # continue testing the rest of the string - - -class TestFilters(unittest.TestCase): - """TestCases for the various Filter subclasses.""" - - text = """this text with http://url.com and SomeLinksLike - ftp://my.site.com.au/some/file AndOthers not:/quite.a.url - with-an@aemail.address as well""" - - def setUp(self): - pass - - def test_URLFilter(self): - """Test filtering of URLs""" - tkns = get_tokenizer("en_US", filters=(URLFilter,))(self.text) - out = [t for t in tkns] - exp = [("this", 0), ("text", 5), ("with", 10), ("and", 30), - ("SomeLinksLike", 34), ("AndOthers", 93), ("not", 103), ("quite", 108), - ("a", 114), ("url", 116), ("with", 134), ("an", 139), ("aemail", 142), - ("address", 149), ("as", 157), ("well", 160)] - self.assertEqual(out, exp) - - def test_WikiWordFilter(self): - """Test filtering of WikiWords""" - tkns = get_tokenizer("en_US", filters=(WikiWordFilter,))(self.text) - out = [t for t in tkns] - exp = [("this", 0), ("text", 5), ("with", 10), ("http", 15), ("url", 22), ("com", 26), - ("and", 30), ("ftp", 62), ("my", 68), ("site", 71), ("com", 76), ("au", 80), - ("some", 83), ("file", 88), ("not", 103), ("quite", 108), - ("a", 114), ("url", 116), ("with", 134), ("an", 139), ("aemail", 142), - ("address", 149), ("as", 157), ("well", 160)] - self.assertEqual(out, exp) - - def test_EmailFilter(self): - """Test filtering of email addresses""" - tkns = get_tokenizer("en_US", filters=(EmailFilter,))(self.text) - out = [t for t in tkns] - exp = [("this", 0), ("text", 5), ("with", 10), ("http", 15), ("url", 22), ("com", 26), - ("and", 30), ("SomeLinksLike", 34), - ("ftp", 62), ("my", 68), ("site", 71), ("com", 76), ("au", 80), - ("some", 83), ("file", 88), ("AndOthers", 93), ("not", 103), ("quite", 108), - ("a", 114), ("url", 116), - ("as", 157), ("well", 160)] - self.assertEqual(out, exp) - - def test_CombinedFilter(self): - """Test several filters combined""" - tkns = get_tokenizer("en_US", filters=(URLFilter, WikiWordFilter, EmailFilter))(self.text) - out = [t for t in tkns] - exp = [("this", 0), ("text", 5), ("with", 10), - ("and", 30), ("not", 103), ("quite", 108), - ("a", 114), ("url", 116), - ("as", 157), ("well", 160)] - self.assertEqual(out, exp) - - -class TestChunkers(unittest.TestCase): - """TestCases for the various Chunker subclasses.""" - - def test_HTMLChunker(self): - """Test filtering of URLs""" - text = """hellomy titlethis is a - simple HTML document for

testing purposes

. - It < contains > various <-- special characters. - """ - tkns = get_tokenizer("en_US", chunkers=(HTMLChunker,))(text) - out = [t for t in tkns] - exp = [("hello", 0), ("my", 24), ("title", 27), ("this", 53), ("is", 58), - ("a", 61), ("simple", 82), ("HTML", 93), ("document", 98), ("for", 107), - ("test", 115), ("ing", 122), ("purposes", 130), ("It", 160), - ("contains", 165), ("various", 176), ("special", 188), - ("characters", 196)] - self.assertEqual(out, exp) - for (word, pos) in out: - self.assertEqual(text[pos:pos + len(word)], word) - - -class TestTokenizeEN(unittest.TestCase): - """TestCases for checking behaviour of English tokenization.""" - - def test_tokenize_en(self): - """Simple regression test for English tokenization.""" - input = """This is a paragraph. It's not very special, but it's designed -2 show how the splitter works with many-different combos -of words. Also need to "test" the handling of 'quoted' words.""" - output = [ - ("This", 0), ("is", 5), ("a", 8), ("paragraph", 10), ("It's", 22), - ("not", 27), ("very", 31), ("special", 36), ("but", 45), ("it's", 49), - ("designed", 54), ("show", 65), ("how", 70), ("the", 74), - ("splitter", 78), ("works", 87), ("with", 93), ("many", 98), - ("different", 103), ("combos", 113), ("of", 120), ("words", 123), - ("Also", 130), ("need", 135), - ("to", 140), ("test", 144), ("the", 150), ("handling", 154), - ("of", 163), ("quoted", 167), ("words", 175) - ] - for (itmO, itmV) in zip(output, tokenize_en(input)): - self.assertEqual(itmO, itmV) - - def test_unicodeBasic(self): - """Test tokenization of a basic unicode string.""" - input = raw_unicode( - r"Ik ben ge\u00EFnteresseerd in de co\u00F6rdinatie van mijn knie\u00EBn, maar kan niet \u00E9\u00E9n \u00E0 twee enqu\u00EAtes vinden die recht doet aan mijn carri\u00E8re op Cura\u00E7ao") - output = input.split(" ") - output[8] = output[8][0:-1] - for (itmO, itmV) in zip(output, tokenize_en(input)): - self.assertEqual(itmO, itmV[0]) - self.assertTrue(input[itmV[1]:].startswith(itmO)) - - def test_unicodeCombining(self): - """Test tokenization with unicode combining symbols.""" - input = raw_unicode( - r"Ik ben gei\u0308nteresseerd in de co\u00F6rdinatie van mijn knie\u00EBn, maar kan niet e\u0301e\u0301n \u00E0 twee enqu\u00EAtes vinden die recht doet aan mijn carri\u00E8re op Cura\u00E7ao") - output = input.split(" ") - output[8] = output[8][0:-1] - for (itmO, itmV) in zip(output, tokenize_en(input)): - self.assertEqual(itmO, itmV[0]) - self.assertTrue(input[itmV[1]:].startswith(itmO)) - - def test_utf8_bytes(self): - """Test tokenization of UTF8-encoded bytes (bug #2500184).""" - # Python3 doesn't support bytestrings, don't run this test - if str is unicode: - return - input = "A r\xc3\xa9sum\xc3\xa9, also spelled resum\xc3\xa9 or resume" - output = input.split(" ") - output[1] = output[1][0:-1] - for (itmO, itmV) in zip(output, tokenize_en(input)): - self.assertEqual(itmO, itmV[0]) - self.assertTrue(input[itmV[1]:].startswith(itmO)) - - def test_utf8_bytes_at_end(self): - """Test tokenization of UTF8-encoded bytes at end of word.""" - # Python3 doesn't support bytestrings, don't run this test - if str is unicode: - return - input = "A r\xc3\xa9sum\xc3\xa9, also spelled resum\xc3\xa9 or resume" - output = input.split(" ") - output[1] = output[1][0:-1] - for (itmO, itmV) in zip(output, tokenize_en(input)): - self.assertEqual(itmO, itmV[0]) - - def test_utf8_bytes_in_an_array(self): - """Test tokenization of UTF8-encoded bytes stored in an array.""" - # Python3 doesn't support bytestrings, don't run this test - if str is unicode: - return - input = "A r\xc3\xa9sum\xc3\xa9, also spelled resum\xc3\xa9 or resume" - output = input.split(" ") - output[1] = output[1][0:-1] - input = array.array('c', input) - output = [array.array('c', w) for w in output] - for (itmO, itmV) in zip(output, tokenize_en(array.array('c', input))): - self.assertEqual(itmO, itmV[0]) - self.assertEqual(input[itmV[1]:itmV[1] + len(itmV[0])], itmO) - - def test_bug1591450(self): - """Check for tokenization regressions identified in bug #1591450.""" - input = """Testing markup and {y:i}so-forth...leading dots and trail--- well, you get-the-point. Also check numbers: 999 1,000 12:00 .45. Done?""" - output = [ - ("Testing", 0), ("i", 9), ("markup", 11), ("i", 19), ("and", 22), - ("y", 27), ("i", 29), ("so", 31), ("forth", 34), ("leading", 42), - ("dots", 50), ("and", 55), ("trail", 59), ("well", 68), - ("you", 74), ("get", 78), ("the", 82), ("point", 86), - ("Also", 93), ("check", 98), ("numbers", 104), ("Done", 134), - ] - for (itmO, itmV) in zip(output, tokenize_en(input)): - self.assertEqual(itmO, itmV) - - def test_bug2785373(self): - """Testcases for bug #2785373""" - input = "So, one dey when I wes 17, I left." - for _ in tokenize_en(input): - pass - input = raw_unicode("So, one dey when I wes 17, I left.") - for _ in tokenize_en(input): - pass - - def test_finnish_text(self): - """Test tokenizing some Finnish text. - - This really should work since there are no special rules to apply, - just lots of non-ascii characters. - """ - inputT = raw_unicode( - 'T\\xe4m\\xe4 on kappale. Eip\\xe4 ole kovin 2 nen, mutta tarkoitus on n\\xe4ytt\\xe4\\xe4 miten sanastaja \\ntoimii useiden-erilaisten sanarypp\\xe4iden kimpussa.\\nPit\\xe4\\xe4p\\xe4 viel\\xe4 \'tarkistaa\' sanat jotka "lainausmerkeiss\\xe4". Heittomerkki ja vaa\'an.\\nUlkomaisia sanoja s\\xfcss, spa\\xdf.') - outputT = [ - (raw_unicode('T\\xe4m\\xe4'), 0), (raw_unicode('on'), 5), (raw_unicode('kappale'), 8), - (raw_unicode('Eip\\xe4'), 17), (raw_unicode('ole'), 22), (raw_unicode('kovin'), 26), - (raw_unicode('nen'), 34), (raw_unicode('mutta'), 39), (raw_unicode('tarkoitus'), 45), - (raw_unicode('on'), 55), (raw_unicode('n\\xe4ytt\\xe4\\xe4'), 58), (raw_unicode('miten'), 66), - (raw_unicode('sanastaja'), 72), (raw_unicode('toimii'), 83), (raw_unicode('useiden'), 90), - (raw_unicode('erilaisten'), 98), (raw_unicode('sanarypp\\xe4iden'), 109), (raw_unicode('kimpussa'), 123), - (raw_unicode('Pit\\xe4\\xe4p\\xe4'), 133), (raw_unicode('viel\\xe4'), 141), (raw_unicode('tarkistaa'), 148), - (raw_unicode('sanat'), 159), (raw_unicode('jotka'), 165), (raw_unicode('lainausmerkeiss\\xe4'), 172), - (raw_unicode('Heittomerkki'), 191), (raw_unicode('ja'), 204), (raw_unicode("vaa'an"), 207), - (raw_unicode('Ulkomaisia'), 215), (raw_unicode('sanoja'), 226), (raw_unicode('s\\xfcss'), 233), - (raw_unicode('spa\\xdf'), 239), ] - for (itmO, itmV) in zip(outputT, tokenize_en(inputT)): - self.assertEqual(itmO, itmV) diff --git a/PACK/enchant/utils.py b/PACK/enchant/utils.py deleted file mode 100644 index e919fd7..0000000 --- a/PACK/enchant/utils.py +++ /dev/null @@ -1,354 +0,0 @@ -# pyenchant -# -# Copyright (C) 2004-2008 Ryan Kelly -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the -# Free Software Foundation, Inc., 59 Temple Place - Suite 330, -# Boston, MA 02111-1307, USA. -# -# In addition, as a special exception, you are -# given permission to link the code of this program with -# non-LGPL Spelling Provider libraries (eg: a MSFT Office -# spell checker backend) and distribute linked combinations including -# the two. You must obey the GNU Lesser General Public License in all -# respects for all of the code used other than said providers. If you modify -# this file, you may extend this exception to your version of the -# file, but you are not obligated to do so. If you do not wish to -# do so, delete this exception statement from your version. -# -""" - -enchant.utils: Misc utilities for the enchant package -======================================================== - -This module provides miscellaneous utilities for use with the -enchant spellchecking package. Currently available functionality -includes: - - * string/unicode compatibility wrappers - * functions for dealing with locale/language settings - * ability to list supporting data files (win32 only) - * functions for bundling supporting data files from a build - -""" - -import os -import sys -import codecs - -from enchant.errors import * - -# Attempt to access local language information -try: - import locale -except ImportError: - locale = None - -# -# Unicode/Bytes compatabilty wrappers. -# -# These allow us to support both Python 2.x and Python 3.x from -# the same codebase. -# -# We provide explicit type objects "bytes" and "unicode" that can be -# used to construct instances of the appropriate type. The class -# "EnchantStr" derives from the default "str" type and implements the -# necessary logic for encoding/decoding as strings are passed into -# the underlying C library (where they must always be utf-8 encoded -# byte strings). -# - -try: - unicode = unicode -except NameError: - str = str - unicode = str - bytes = bytes - basestring = (str, bytes) -else: - str = str - unicode = unicode - bytes = str - basestring = basestring - - -def raw_unicode(raw): - """Make a unicode string from a raw string. - - This function takes a string containing unicode escape characters, - and returns the corresponding unicode string. Useful for writing - unicode string literals in your python source while being upwards- - compatible with Python 3. For example, instead of doing this: - - s = u"hello\u2149" # syntax error in Python 3 - - Or this: - - s = "hello\u2149" # not what you want in Python 2.x - - You can do this: - - s = raw_unicode(r"hello\u2149") # works everywhere! - - """ - return raw.encode("utf8").decode("unicode-escape") - - -def raw_bytes(raw): - """Make a bytes object out of a raw string. - - This is analogous to raw_unicode, but processes byte escape characters - to produce a bytes object. - """ - return codecs.escape_decode(raw)[0] - - -class EnchantStr(str): - """String subclass for interfacing with enchant C library. - - This class encapsulates the logic for interfacing between python native - string/unicode objects and the underlying enchant library, which expects - all strings to be UTF-8 character arrays. It is a subclass of the - default string class 'str' - on Python 2.x that makes it an ascii string, - on Python 3.x it is a unicode object. - - Initialise it with a string or unicode object, and use the encode() method - to obtain an object suitable for passing to the underlying C library. - When strings are read back into python, use decode(s) to translate them - back into the appropriate python-level string type. - - This allows us to following the common Python 2.x idiom of returning - unicode when unicode is passed in, and byte strings otherwise. It also - lets the interface be upwards-compatible with Python 3, in which string - objects are unicode by default. - """ - - def __new__(cls, value): - """EnchantStr data constructor. - - This method records whether the initial string was unicode, then - simply passes it along to the default string constructor. - """ - if type(value) is unicode: - was_unicode = True - if str is not unicode: - value = value.encode("utf-8") - else: - was_unicode = False - if str is not bytes: - raise Error("Don't pass bytestrings to pyenchant") - self = str.__new__(cls, value) - self._was_unicode = was_unicode - return self - - def encode(self): - """Encode this string into a form usable by the enchant C library.""" - if str is unicode: - return str.encode(self, "utf-8") - else: - return self - - def decode(self, value): - """Decode a string returned by the enchant C library.""" - if self._was_unicode: - if str is unicode: - # On some python3 versions, ctypes converts c_char_p - # to str() rather than bytes() - if isinstance(value, str): - value = value.encode() - return value.decode("utf-8") - else: - return value.decode("utf-8") - else: - return value - - -def printf(values, sep=" ", end="\n", file=None): - """Compatability wrapper from print statement/function. - - This function is a simple Python2/Python3 compatability wrapper - for printing to stdout. - """ - if file is None: - file = sys.stdout - file.write(sep.join(map(str, values))) - file.write(end) - - -try: - next = next -except NameError: - def next(iter): - """Compatability wrapper for advancing an iterator.""" - return iter.next() - -try: - xrange = xrange -except NameError: - xrange = range - - -# -# Other useful functions. -# - - -def levenshtein(s1, s2): - """Calculate the Levenshtein distance between two strings. - - This is straight from Wikipedia. - """ - if len(s1) < len(s2): - return levenshtein(s2, s1) - if not s1: - return len(s2) - - previous_row = xrange(len(s2) + 1) - for i, c1 in enumerate(s1): - current_row = [i + 1] - for j, c2 in enumerate(s2): - insertions = previous_row[j + 1] + 1 - deletions = current_row[j] + 1 - substitutions = previous_row[j] + (c1 != c2) - current_row.append(min(insertions, deletions, substitutions)) - previous_row = current_row - - return previous_row[-1] - - -def trim_suggestions(word, suggs, maxlen, calcdist=None): - """Trim a list of suggestions to a maximum length. - - If the list of suggested words is too long, you can use this function - to trim it down to a maximum length. It tries to keep the "best" - suggestions based on similarity to the original word. - - If the optional "calcdist" argument is provided, it must be a callable - taking two words and returning the distance between them. It will be - used to determine which words to retain in the list. The default is - a simple Levenshtein distance. - """ - if calcdist is None: - calcdist = levenshtein - decorated = [(calcdist(word, s), s) for s in suggs] - decorated.sort() - return [s for (l, s) in decorated[:maxlen]] - - -def get_default_language(default=None): - """Determine the user's default language, if possible. - - This function uses the 'locale' module to try to determine - the user's preferred language. The return value is as - follows: - - * if a locale is available for the LC_MESSAGES category, - that language is used - * if a default locale is available, that language is used - * if the keyword argument is given, it is used - * if nothing else works, None is returned - - Note that determining the user's language is in general only - possible if they have set the necessary environment variables - on their system. - """ - try: - import locale - tag = locale.getlocale()[0] - if tag is None: - tag = locale.getdefaultlocale()[0] - if tag is None: - raise Error("No default language available") - return tag - except Exception: - pass - return default - - -get_default_language._DOC_ERRORS = ["LC"] - - -def get_resource_filename(resname): - """Get the absolute path to the named resource file. - - This serves widely the same purpose as pkg_resources.resource_filename(), - but tries to avoid loading pkg_resources unless we're actually in - an egg. - """ - path = os.path.dirname(os.path.abspath(__file__)) - path = os.path.join(path, resname) - if os.path.exists(path): - return path - if hasattr(sys, "frozen"): - exe_path = unicode(sys.executable, sys.getfilesystemencoding()) - exe_dir = os.path.dirname(exe_path) - path = os.path.join(exe_dir, resname) - if os.path.exists(path): - return path - else: - import pkg_resources - try: - path = pkg_resources.resource_filename("enchant", resname) - except KeyError: - pass - else: - path = os.path.abspath(path) - if os.path.exists(path): - return path - raise Error("Could not locate resource '%s'" % (resname,)) - - -def win32_data_files(): - """Get list of supporting data files, for use with setup.py - - This function returns a list of the supporting data files available - to the running version of PyEnchant. This is in the format expected - by the data_files argument of the distutils setup function. It's - very useful, for example, for including the data files in an executable - produced by py2exe. - - Only really tested on the win32 platform (it's the only platform for - which we ship our own supporting data files) - """ - # Include the main enchant DLL - try: - libEnchant = get_resource_filename("libenchant.dll") - except Error: - libEnchant = get_resource_filename("libenchant-1.dll") - mainDir = os.path.dirname(libEnchant) - dataFiles = [('', [libEnchant])] - # And some specific supporting DLLs - for dll in os.listdir(mainDir): - if not dll.endswith(".dll"): - continue - for prefix in ("iconv", "intl", "libglib", "libgmodule"): - if dll.startswith(prefix): - break - else: - continue - dataFiles[0][1].append(os.path.join(mainDir, dll)) - # And anything found in the supporting data directories - dataDirs = ("share/enchant/myspell", "share/enchant/ispell", "lib/enchant") - for dataDir in dataDirs: - files = [] - fullDir = os.path.join(mainDir, os.path.normpath(dataDir)) - for fn in os.listdir(fullDir): - fullFn = os.path.join(fullDir, fn) - if os.path.isfile(fullFn): - files.append(fullFn) - dataFiles.append((dataDir, files)) - return dataFiles - - -win32_data_files._DOC_ERRORS = ["py", "py", "exe"] diff --git a/PACK/maskgen.py b/PACK/maskgen.py old mode 100644 new mode 100755 index 9c941f8..bcf517d --- a/PACK/maskgen.py +++ b/PACK/maskgen.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/python3 # MaskGen - Generate Password Masks # # This tool is part of PACK (Password Analysis and Cracking Kit) @@ -36,6 +36,11 @@ class MaskGen: self.minoccurrence = None self.maxoccurrence = None + self.customcharset1len = None + self.customcharset2len = None + self.customcharset3len = None + self.customcharset4len = None + # PPS (Passwords per Second) Cracking Speed self.pps = 1000000000 self.showmasks = False @@ -57,42 +62,60 @@ class MaskGen: count *= 33 elif char == "a": count *= 95 + elif char == "b": + count *= 256 + elif char == "h": + count *= 16 + elif char == "H": + count *= 16 + elif char == "1" and self.customcharset1len: + count *= self.customcharset1len + elif char == "2" and self.customcharset2len: + count *= self.customcharset2len + elif char == "3" and self.customcharset3len: + count *= self.customcharset3len + elif char == "4" and self.customcharset4len: + count *= self.customcharset4len else: - print - "[!] Error, unknown mask ?%s in a mask %s" % (char, mask) + print("[!] Error, unknown mask ?%s in a mask %s" % + (char, mask)) return count def loadmasks(self, filename): """ Load masks and apply filters. """ - maskReader = csv.reader(open(args[0], 'r'), delimiter=',', quotechar='"') + maskReader = csv.reader( + open(args[0], 'r'), delimiter=',', quotechar='"') for (mask, occurrence) in maskReader: - if mask == "": continue + if mask == "": + continue mask_occurrence = int(occurrence) - mask_length = len(mask) / 2 + mask_length = len(mask)/2 mask_complexity = self.getcomplexity(mask) - mask_time = mask_complexity / self.pps + mask_time = mask_complexity/self.pps self.total_occurrence += mask_occurrence # Apply filters based on occurrence, length, complexity and time if (self.minoccurrence == None or mask_occurrence >= self.minoccurrence) and \ - (self.maxoccurrence == None or mask_occurrence <= self.maxoccurrence) and \ - (self.mincomplexity == None or mask_complexity <= self.mincomplexity) and \ - (self.maxcomplexity == None or mask_complexity <= self.maxcomplexity) and \ - (self.mintime == None or mask_time <= self.mintime) and \ - (self.maxtime == None or mask_time <= self.maxtime) and \ - (self.maxlength == None or mask_length <= self.maxlength) and \ - (self.minlength == None or mask_length >= self.minlength): + (self.maxoccurrence == None or mask_occurrence <= self.maxoccurrence) and \ + (self.mincomplexity == None or mask_complexity >= self.mincomplexity) and \ + (self.maxcomplexity == None or mask_complexity <= self.maxcomplexity) and \ + (self.mintime == None or mask_time >= self.mintime) and \ + (self.maxtime == None or mask_time <= self.maxtime) and \ + (self.maxlength == None or mask_length <= self.maxlength) and \ + (self.minlength == None or mask_length >= self.minlength): + self.masks[mask] = dict() self.masks[mask]['length'] = mask_length self.masks[mask]['occurrence'] = mask_occurrence self.masks[mask]['complexity'] = 1 - mask_complexity self.masks[mask]['time'] = mask_time - self.masks[mask]['optindex'] = 1 - mask_complexity / mask_occurrence + self.masks[mask]['optindex'] = 1 - \ + mask_complexity/mask_occurrence def generate_masks(self, sorting_mode): """ Generate optimal password masks sorted by occurrence, complexity or optindex """ @@ -104,16 +127,15 @@ class MaskGen: # Group by length 1,2,3,4,5,6,7,8,9,10.... # Group by occurrence 10%, 20%, 30%, 40%, 50%.... - if self.showmasks: print - "[L:] Mask: [ Occ: ] [ Time: ]" - for mask in sorted(self.masks.keys(), key=lambda mask: self.masks[mask][sorting_mode], reverse=True): + if self.showmasks: + print("[L:] Mask: [ Occ: ] [ Time: ]") + for mask in sorted(list(self.masks.keys()), key=lambda mask: self.masks[mask][sorting_mode], reverse=True): if self.showmasks: - time_human = ">1 year" if self.masks[mask]['time'] > 60 * 60 * 24 * 365 else str( + time_human = ">1 year" if self.masks[mask]['time'] > 60*60*24*365 else str( datetime.timedelta(seconds=self.masks[mask]['time'])) - print - "[{:>2}] {:<30} [{:<7}] [{:>8}] ".format(self.masks[mask]['length'], mask, - self.masks[mask]['occurrence'], time_human) + print("[{:>2}] {:<30} [{:<7}] [{:>8}] ".format( + self.masks[mask]['length'], mask, self.masks[mask]['occurrence'], time_human)) if self.output_file: self.output_file.write("%s\n" % mask) @@ -123,20 +145,16 @@ class MaskGen: sample_count += 1 if self.target_time and sample_time > self.target_time: - print - "[!] Target time exceeded." + print("[!] Target time exceeded.") break - print - "[*] Finished generating masks:" - print - " Masks generated: %s" % sample_count - print - " Masks coverage: %d%% (%d/%d)" % ( - sample_occurrence * 100 / self.total_occurrence, sample_occurrence, self.total_occurrence) - time_human = ">1 year" if sample_time > 60 * 60 * 24 * 365 else str(datetime.timedelta(seconds=sample_time)) - print - " Masks runtime: %s" % time_human + print("[*] Finished generating masks:") + print(" Masks generated: %s" % sample_count) + print(" Masks coverage: %d%% (%d/%d)" % (sample_occurrence*100 / + self.total_occurrence, sample_occurrence, self.total_occurrence)) + time_human = ">1 year" if sample_time > 60*60*24 * \ + 365 else str(datetime.timedelta(seconds=sample_time)) + print(" Masks runtime: %s" % time_human) def getmaskscoverage(self, checkmasks): @@ -145,8 +163,8 @@ class MaskGen: total_complexity = 0 - if self.showmasks: print - "[L:] Mask: [ Occ: ] [ Time: ]" + if self.showmasks: + print("[L:] Mask: [ Occ: ] [ Time: ]") for mask in checkmasks: mask = mask.strip() mask_complexity = self.getcomplexity(mask) @@ -156,11 +174,10 @@ class MaskGen: if mask in self.masks: if self.showmasks: - time_human = ">1 year" if self.masks[mask]['time'] > 60 * 60 * 24 * 365 else str( + time_human = ">1 year" if self.masks[mask]['time'] > 60*60*24*365 else str( datetime.timedelta(seconds=self.masks[mask]['time'])) - print - "[{:>2}] {:<30} [{:<7}] [{:>8}] ".format(self.masks[mask]['length'], mask, - self.masks[mask]['occurrence'], time_human) + print("[{:>2}] {:<30} [{:<7}] [{:>8}] ".format( + self.masks[mask]['length'], mask, self.masks[mask]['occurrence'], time_human)) if self.output_file: self.output_file.write("%s\n" % mask) @@ -168,23 +185,19 @@ class MaskGen: sample_occurrence += self.masks[mask]['occurrence'] sample_count += 1 - if self.target_time and total_complexity / self.pps > self.target_time: - print - "[!] Target time exceeded." + if self.target_time and total_complexity/self.pps > self.target_time: + print("[!] Target time exceeded.") break # TODO: Something wrong here, complexity and time doesn't match with estimated from policygen - total_time = total_complexity / self.pps - time_human = ">1 year" if total_time > 60 * 60 * 24 * 365 else str(datetime.timedelta(seconds=total_time)) - print - "[*] Finished matching masks:" - print - " Masks matched: %s" % sample_count - print - " Masks coverage: %d%% (%d/%d)" % ( - sample_occurrence * 100 / self.total_occurrence, sample_occurrence, self.total_occurrence) - print - " Masks runtime: %s" % time_human + total_time = total_complexity/self.pps + time_human = ">1 year" if total_time > 60*60*24 * \ + 365 else str(datetime.timedelta(seconds=total_time)) + print("[*] Finished matching masks:") + print(" Masks matched: %s" % sample_count) + print(" Masks coverage: %d%% (%d/%d)" % (sample_occurrence*100 / + self.total_occurrence, sample_occurrence, self.total_occurrence)) + print(" Masks runtime: %s" % time_human) if __name__ == "__main__": @@ -199,85 +212,127 @@ if __name__ == "__main__": header += " |_| iphelix@thesprawl.org\n" header += "\n" - parser = OptionParser("%prog pass0.masks [pass1.masks ...] [options]", version="%prog " + VERSION) + parser = OptionParser( + "%prog pass0.masks [pass1.masks ...] [options]", version="%prog "+VERSION) - parser.add_option("-t", "--targettime", dest="target_time", type="int", metavar="86400", - help="Target time of all masks (seconds)") - parser.add_option("-o", "--outputmasks", dest="output_masks", metavar="masks.hcmask", help="Save masks to a file") + parser.add_option("-t", "--targettime", dest="target_time", type="int", + metavar="86400", help="Target time of all masks (seconds)") + parser.add_option("-o", "--outputmasks", dest="output_masks", + metavar="masks.hcmask", help="Save masks to a file") filters = OptionGroup(parser, "Individual Mask Filter Options") - filters.add_option("--minlength", dest="minlength", type="int", metavar="8", help="Minimum password length") - filters.add_option("--maxlength", dest="maxlength", type="int", metavar="8", help="Maximum password length") - filters.add_option("--mintime", dest="mintime", type="int", metavar="3600", help="Minimum mask runtime (seconds)") - filters.add_option("--maxtime", dest="maxtime", type="int", metavar="3600", help="Maximum mask runtime (seconds)") - filters.add_option("--mincomplexity", dest="mincomplexity", type="int", metavar="1", help="Minimum complexity") - filters.add_option("--maxcomplexity", dest="maxcomplexity", type="int", metavar="100", help="Maximum complexity") - filters.add_option("--minoccurrence", dest="minoccurrence", type="int", metavar="1", help="Minimum occurrence") - filters.add_option("--maxoccurrence", dest="maxoccurrence", type="int", metavar="100", help="Maximum occurrence") + filters.add_option("--minlength", dest="minlength", + type="int", metavar="8", help="Minimum password length") + filters.add_option("--maxlength", dest="maxlength", + type="int", metavar="8", help="Maximum password length") + filters.add_option("--mintime", dest="mintime", type="int", + metavar="3600", help="Minimum mask runtime (seconds)") + filters.add_option("--maxtime", dest="maxtime", type="int", + metavar="3600", help="Maximum mask runtime (seconds)") + filters.add_option("--mincomplexity", dest="mincomplexity", + type="int", metavar="1", help="Minimum complexity") + filters.add_option("--maxcomplexity", dest="maxcomplexity", + type="int", metavar="100", help="Maximum complexity") + filters.add_option("--minoccurrence", dest="minoccurrence", + type="int", metavar="1", help="Minimum occurrence") + filters.add_option("--maxoccurrence", dest="maxoccurrence", + type="int", metavar="100", help="Maximum occurrence") parser.add_option_group(filters) sorting = OptionGroup(parser, "Mask Sorting Options") - sorting.add_option("--optindex", action="store_true", dest="optindex", help="sort by mask optindex (default)", - default=False) - sorting.add_option("--occurrence", action="store_true", dest="occurrence", help="sort by mask occurrence", - default=False) - sorting.add_option("--complexity", action="store_true", dest="complexity", help="sort by mask complexity", - default=False) + sorting.add_option("--optindex", action="store_true", dest="optindex", + help="sort by mask optindex (default)", default=False) + sorting.add_option("--occurrence", action="store_true", dest="occurrence", + help="sort by mask occurrence", default=False) + sorting.add_option("--complexity", action="store_true", dest="complexity", + help="sort by mask complexity", default=False) parser.add_option_group(sorting) coverage = OptionGroup(parser, "Check mask coverage") - coverage.add_option("--checkmasks", dest="checkmasks", help="check mask coverage", - metavar="?u?l?l?l?l?l?d,?l?l?l?l?l?d?d") - coverage.add_option("--checkmasksfile", dest="checkmasks_file", help="check mask coverage in a file", - metavar="masks.hcmask") + coverage.add_option("--checkmasks", dest="checkmasks", + help="check mask coverage", metavar="?u?l?l?l?l?l?d,?l?l?l?l?l?d?d") + coverage.add_option("--checkmasksfile", dest="checkmasks_file", + help="check mask coverage in a file", metavar="masks.hcmask") parser.add_option_group(coverage) - parser.add_option("--showmasks", dest="showmasks", help="Show matching masks", action="store_true", default=False) + parser.add_option("--showmasks", dest="showmasks", + help="Show matching masks", action="store_true", default=False) + + custom = OptionGroup(parser, "Custom charater set options") + custom.add_option("--custom-charset1-len", dest="customcharset1len", + type="int", metavar="26", help="Length of cutom character set 1") + custom.add_option("--custom-charset2-len", dest="customcharset2len", + type="int", metavar="26", help="Length of cutom character set 2") + custom.add_option("--custom-charset3-len", dest="customcharset3len", + type="int", metavar="26", help="Length of cutom character set 3") + custom.add_option("--custom-charset4-len", dest="customcharset4len", + type="int", metavar="26", help="Length of cutom character set 4") + parser.add_option_group(custom) misc = OptionGroup(parser, "Miscellaneous options") - misc.add_option("--pps", dest="pps", help="Passwords per Second", type="int", metavar="1000000000") - misc.add_option("-q", "--quiet", action="store_true", dest="quiet", default=False, help="Don't show headers.") + misc.add_option("--pps", dest="pps", help="Passwords per Second", + type="int", metavar="1000000000") + misc.add_option("-q", "--quiet", action="store_true", + dest="quiet", default=False, help="Don't show headers.") parser.add_option_group(misc) (options, args) = parser.parse_args() # Print program header if not options.quiet: - print - header + print(header) if len(args) < 1: - parser.error("no masks file specified! Please provide statsgen output.") + parser.error( + "no masks file specified! Please provide statsgen output.") exit(1) - print - "[*] Analyzing masks in [%s]" % args[0] + print("[*] Analyzing masks in [%s]" % args[0]) maskgen = MaskGen() # Settings - if options.target_time: maskgen.target_time = options.target_time + if options.target_time: + maskgen.target_time = options.target_time if options.output_masks: - print - "[*] Saving generated masks to [%s]" % options.output_masks + print("[*] Saving generated masks to [%s]" % options.output_masks) maskgen.output_file = open(options.output_masks, 'w') # Filters - if options.minlength: maskgen.minlength = options.minlength - if options.maxlength: maskgen.maxlength = options.maxlength - if options.mintime: maskgen.mintime = options.mintime - if options.maxtime: maskgen.maxtime = options.maxtime - if options.mincomplexity: maskgen.mincomplexity = options.mincomplexity - if options.maxcomplexity: maskgen.maxcomplexity = options.maxcomplexity - if options.minoccurrence: maskgen.minoccurrence = options.minoccurrence - if options.maxoccurrence: maskgen.maxoccurrence = options.maxoccurrence + if options.minlength: + maskgen.minlength = options.minlength + if options.maxlength: + maskgen.maxlength = options.maxlength + if options.mintime: + maskgen.mintime = options.mintime + if options.maxtime: + maskgen.maxtime = options.maxtime + if options.mincomplexity: + maskgen.mincomplexity = options.mincomplexity + if options.maxcomplexity: + maskgen.maxcomplexity = options.maxcomplexity + if options.minoccurrence: + maskgen.minoccurrence = options.minoccurrence + if options.maxoccurrence: + maskgen.maxoccurrence = options.maxoccurrence + + # Custom + if options.customcharset1len: + maskgen.customcharset1len = options.customcharset1len + if options.customcharset2len: + maskgen.customcharset2len = options.customcharset2len + if options.customcharset3len: + maskgen.customcharset3len = options.customcharset3len + if options.customcharset4len: + maskgen.customcharset4len = options.customcharset4len # Misc - if options.pps: maskgen.pps = options.pps - if options.showmasks: maskgen.showmasks = options.showmasks + if options.pps: + maskgen.pps = options.pps + if options.showmasks: + maskgen.showmasks = options.showmasks - print - "[*] Using {:,d} keys/sec for calculations.".format(maskgen.pps) + print("[*] Using {:,d} keys/sec for calculations.".format(maskgen.pps)) # Load masks for arg in args: @@ -286,15 +341,15 @@ if __name__ == "__main__": # Matching masks from the command-line if options.checkmasks: checkmasks = [m.strip() for m in options.checkmasks.split(',')] - print - "[*] Checking coverage of the these masks [%s]" % ", ".join(checkmasks) + print("[*] Checking coverage of the these masks [%s]" % + ", ".join(checkmasks)) maskgen.getmaskscoverage(checkmasks) # Matching masks from a file elif options.checkmasks_file: checkmasks_file = open(options.checkmasks_file, 'r') - print - "[*] Checking coverage of masks in [%s]" % options.checkmasks_file + print("[*] Checking coverage of masks in [%s]" % + options.checkmasks_file) maskgen.getmaskscoverage(checkmasks_file) # Printing masks in a file @@ -307,6 +362,5 @@ if __name__ == "__main__": else: sorting_mode = "optindex" - print - "[*] Sorting masks by their [%s]." % sorting_mode + print("[*] Sorting masks by their [%s]." % sorting_mode) maskgen.generate_masks(sorting_mode) diff --git a/PACK/policygen.py b/PACK/policygen.py old mode 100644 new mode 100755 index 8451b3d..a42f2e0 --- a/PACK/policygen.py +++ b/PACK/policygen.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python3 # PolicyGen - Analyze and Generate password masks according to a password policy # # This tool is part of PACK (Password Analysis and Cracking Kit) @@ -10,7 +10,9 @@ # # Please see the attached LICENSE file for additional licensing information. -import sys, string, random +import sys +import string +import random import datetime from optparse import OptionParser, OptionGroup import itertools @@ -52,7 +54,7 @@ class PolicyGen: elif char == "a": count *= 95 else: - print + print() "[!] Error, unknown mask ?%s in a mask %s" % (char, mask) return count @@ -69,8 +71,8 @@ class PolicyGen: sample_complexity = 0 # TODO: Randomize or even statistically arrange matching masks - for length in xrange(self.minlength, self.maxlength + 1): - print + for length in range(self.minlength, self.maxlength + 1): + print() "[*] Generating %d character password masks." % length total_length_count = 0 sample_length_count = 0 @@ -106,14 +108,14 @@ class PolicyGen: # Filter according to password policy # NOTE: Perform exact opposite (XOR) operation if noncompliant # flag was set when calling the function. - if ((self.minlower == None or lowercount >= self.minlower) and \ - (self.maxlower == None or lowercount <= self.maxlower) and \ - (self.minupper == None or uppercount >= self.minupper) and \ - (self.maxupper == None or uppercount <= self.maxupper) and \ - (self.mindigit == None or digitcount >= self.mindigit) and \ - (self.maxdigit == None or digitcount <= self.maxdigit) and \ - (self.minspecial == None or specialcount >= self.minspecial) and \ - (self.maxspecial == None or specialcount <= self.maxspecial)) ^ noncompliant: + if ((self.minlower == None or lowercount >= self.minlower) and + (self.maxlower == None or lowercount <= self.maxlower) and + (self.minupper == None or uppercount >= self.minupper) and + (self.maxupper == None or uppercount <= self.maxupper) and + (self.mindigit == None or digitcount >= self.mindigit) and + (self.maxdigit == None or digitcount <= self.maxdigit) and + (self.minspecial == None or specialcount >= self.minspecial) and + (self.maxspecial == None or specialcount <= self.maxspecial)) ^ noncompliant: sample_length_count += 1 sample_length_complexity += mask_complexity @@ -122,10 +124,9 @@ class PolicyGen: mask_time = mask_complexity / self.pps time_human = ">1 year" if mask_time > 60 * 60 * 24 * 365 else str( datetime.timedelta(seconds=mask_time)) - print - "[{:>2}] {:<30} [l:{:>2} u:{:>2} d:{:>2} s:{:>2}] [{:>8}] ".format(length, mask, lowercount, + print("[{:>2}] {:<30} [l:{:>2} u:{:>2} d:{:>2} s:{:>2}] [{:>8}] ".format(length, mask, lowercount, uppercount, digitcount, - specialcount, time_human) + specialcount, time_human)) if self.output_file: self.output_file.write("%s\n" % mask) @@ -137,15 +138,14 @@ class PolicyGen: sample_complexity += sample_length_complexity total_time = total_complexity / self.pps - total_time_human = ">1 year" if total_time > 60 * 60 * 24 * 365 else str(datetime.timedelta(seconds=total_time)) - print - "[*] Total Masks: %d Time: %s" % (total_count, total_time_human) + total_time_human = ">1 year" if total_time > 60 * 60 * 24 * \ + 365 else str(datetime.timedelta(seconds=total_time)) + print("[*] Total Masks: %d Time: %s" % (total_count, total_time_human)) sample_time = sample_complexity / self.pps sample_time_human = ">1 year" if sample_time > 60 * 60 * 24 * 365 else str( datetime.timedelta(seconds=sample_time)) - print - "[*] Policy Masks: %d Time: %s" % (sample_count, sample_time_human) + print("[*] Policy Masks: %d Time: %s" % (sample_count, sample_time_human)) if __name__ == "__main__": @@ -161,10 +161,14 @@ if __name__ == "__main__": header += "\n" # parse command line arguments - parser = OptionParser("%prog [options]\n\nType --help for more options", version="%prog " + VERSION) - parser.add_option("-o", "--outputmasks", dest="output_masks", help="Save masks to a file", metavar="masks.hcmask") - parser.add_option("--pps", dest="pps", help="Passwords per Second", type="int", metavar="1000000000") - parser.add_option("--showmasks", dest="showmasks", help="Show matching masks", action="store_true", default=False) + parser = OptionParser( + "%prog [options]\n\nType --help for more options", version="%prog " + VERSION) + parser.add_option("-o", "--outputmasks", dest="output_masks", + help="Save masks to a file", metavar="masks.hcmask") + parser.add_option("--pps", dest="pps", help="Passwords per Second", + type="int", metavar="1000000000") + parser.add_option("--showmasks", dest="showmasks", + help="Show matching masks", action="store_true", default=False) parser.add_option("--noncompliant", dest="noncompliant", help="Generate masks for noncompliant passwords", action="store_true", default=False) @@ -174,14 +178,16 @@ if __name__ == "__main__": help="Minimum password length") group.add_option("--maxlength", dest="maxlength", type="int", metavar="8", default=8, help="Maximum password length") - group.add_option("--mindigit", dest="mindigit", type="int", metavar="1", help="Minimum number of digits") + group.add_option("--mindigit", dest="mindigit", type="int", + metavar="1", help="Minimum number of digits") group.add_option("--minlower", dest="minlower", type="int", metavar="1", help="Minimum number of lower-case characters") group.add_option("--minupper", dest="minupper", type="int", metavar="1", help="Minimum number of upper-case characters") group.add_option("--minspecial", dest="minspecial", type="int", metavar="1", help="Minimum number of special characters") - group.add_option("--maxdigit", dest="maxdigit", type="int", metavar="3", help="Maximum number of digits") + group.add_option("--maxdigit", dest="maxdigit", type="int", + metavar="3", help="Maximum number of digits") group.add_option("--maxlower", dest="maxlower", type="int", metavar="3", help="Maximum number of lower-case characters") group.add_option("--maxupper", dest="maxupper", type="int", metavar="3", @@ -190,54 +196,62 @@ if __name__ == "__main__": help="Maximum number of special characters") parser.add_option_group(group) - parser.add_option("-q", "--quiet", action="store_true", dest="quiet", default=False, help="Don't show headers.") + parser.add_option("-q", "--quiet", action="store_true", + dest="quiet", default=False, help="Don't show headers.") (options, args) = parser.parse_args() # Print program header if not options.quiet: - print + print() header policygen = PolicyGen() - # Settings + # Settings if options.output_masks: - print - "[*] Saving generated masks to [%s]" % options.output_masks + print("[*] Saving generated masks to [%s]" % options.output_masks) policygen.output_file = open(options.output_masks, 'w') # Password policy - if options.minlength != None: policygen.minlength = options.minlength - if options.maxlength != None: policygen.maxlength = options.maxlength - if options.mindigit != None: policygen.mindigit = options.mindigit - if options.minlower != None: policygen.minlower = options.minlower - if options.minupper != None: policygen.minupper = options.minupper - if options.minspecial != None: policygen.minspecial = options.minspecial - if options.maxdigit != None: policygen.maxdigits = options.maxdigit - if options.maxlower != None: policygen.maxlower = options.maxlower - if options.maxupper != None: policygen.maxupper = options.maxupper - if options.maxspecial != None: policygen.maxspecial = options.maxspecial + if options.minlength != None: + policygen.minlength = options.minlength + if options.maxlength != None: + policygen.maxlength = options.maxlength + if options.mindigit != None: + policygen.mindigit = options.mindigit + if options.minlower != None: + policygen.minlower = options.minlower + if options.minupper != None: + policygen.minupper = options.minupper + if options.minspecial != None: + policygen.minspecial = options.minspecial + if options.maxdigit != None: + policygen.maxdigits = options.maxdigit + if options.maxlower != None: + policygen.maxlower = options.maxlower + if options.maxupper != None: + policygen.maxupper = options.maxupper + if options.maxspecial != None: + policygen.maxspecial = options.maxspecial # Misc - if options.pps: policygen.pps = options.pps - if options.showmasks: policygen.showmasks = options.showmasks + if options.pps: + policygen.pps = options.pps + if options.showmasks: + policygen.showmasks = options.showmasks - print - "[*] Using {:,d} keys/sec for calculations.".format(policygen.pps) + print("[*] Using {:,d} keys/sec for calculations.".format(policygen.pps)) # Print current password policy - print - "[*] Password policy:" - print - " Pass Lengths: min:%d max:%d" % (policygen.minlength, policygen.maxlength) - print - " Min strength: l:%s u:%s d:%s s:%s" % ( - policygen.minlower, policygen.minupper, policygen.mindigit, policygen.minspecial) - print - " Max strength: l:%s u:%s d:%s s:%s" % ( - policygen.maxlower, policygen.maxupper, policygen.maxdigit, policygen.maxspecial) + print("[*] Password policy:") + print(" Pass Lengths: min:%d max:%d" % ( + policygen.minlength, policygen.maxlength)) + print(" Min strength: l:%s u:%s d:%s s:%s" % ( + policygen.minlower, policygen.minupper, policygen.mindigit, policygen.minspecial)) + print(" Max strength: l:%s u:%s d:%s s:%s" % ( + policygen.maxlower, policygen.maxupper, policygen.maxdigit, policygen.maxspecial)) - print - "[*] Generating [%s] masks." % ("compliant" if not options.noncompliant else "non-compliant") + print("[*] Generating [%s] masks." % ( + "compliant" if not options.noncompliant else "non-compliant")) policygen.generate_masks(options.noncompliant) diff --git a/PACK/rulegen.py b/PACK/rulegen.py deleted file mode 100644 index b49e816..0000000 --- a/PACK/rulegen.py +++ /dev/null @@ -1,1191 +0,0 @@ -#!/usr/bin/env python -# Rulegen.py - Advanced automated password rule and wordlist generator for the -# Hashcat password cracker using the Levenshtein Reverse Path -# algorithm and Enchant spell checking library. -# -# This tool is part of PACK (Password Analysis and Cracking Kit) -# -# VERSION 0.0.3 -# -# Copyright (C) 2013 Peter Kacherginsky -# All rights reserved. -# -# Please see the attached LICENSE file for additional licensing information. - -import sys -import re -import time -import operator -import enchant - -from optparse import OptionParser, OptionGroup - -from collections import Counter - -import subprocess - -import multiprocessing - -VERSION = "0.0.3" - -# Testing rules with hashcat --stdout -HASHCAT_PATH = "hashcat/" - - -# Rule Generator class responsible for the complete cycle of rule generation -class RuleGen: - # Initialize Rule Generator class - def __init__(self, language="en", providers="aspell,myspell", basename='analysis', threads=4): - - self.enchant_broker = enchant.Broker() - self.enchant_broker.set_ordering("*", providers) - - self.enchant = enchant.Dict(language, self.enchant_broker) - - # Output options - self.basename = basename - - # Finetuning word generation - self.max_word_dist = 10 - self.max_words = 10 - self.more_words = False - self.simple_words = False - - # Finetuning rule generation - self.max_rule_len = 10 - self.max_rules = 10 - self.more_rules = False - self.simple_rules = False - self.brute_rules = False - - # Debugging options - self.verbose = False - self.debug = False - self.word = None # Custom word to use. - self.quiet = False - - ######################################################################## - # Word and Rule Statistics - self.numeric_stats_total = 0 - self.special_stats_total = 0 - self.foreign_stats_total = 0 - - ######################################################################## - # Preanalysis Password Patterns - self.password_pattern = dict() - self.password_pattern["insertion"] = re.compile('^[^a-z]*(?P.+?)[^a-z]*$', re.IGNORECASE) - self.password_pattern["email"] = re.compile('^(?P.+?)@[A-Z0-9.-]+\.[A-Z]{2,4}', re.IGNORECASE) - self.password_pattern["alldigits"] = re.compile('^(\d+)$', re.IGNORECASE) - self.password_pattern["allspecial"] = re.compile('^([^a-z0-9]+)$', re.IGNORECASE) - - ######################################################################## - # Hashcat Rules Engine - self.hashcat_rule = dict() - - # Dummy rule - self.hashcat_rule[':'] = lambda x: x # Do nothing - - # Case rules - self.hashcat_rule["l"] = lambda x: x.lower() # Lowercase all letters - self.hashcat_rule["u"] = lambda x: x.upper() # Capitalize all letters - self.hashcat_rule["c"] = lambda x: x.capitalize() # Capitalize the first letter - self.hashcat_rule["C"] = lambda x: x[0].lower() + x[ - 1:].upper() # Lowercase the first found character, uppercase the rest - self.hashcat_rule["t"] = lambda x: x.swapcase() # Toggle the case of all characters in word - self.hashcat_rule["T"] = lambda x, y: x[:y] + x[y].swapcase() + x[ - y + 1:] # Toggle the case of characters at position N - self.hashcat_rule["E"] = lambda x: " ".join( - [i[0].upper() + i[1:] for i in x.split(" ")]) # Upper case the first letter and every letter after a space - - # Rotation rules - self.hashcat_rule["r"] = lambda x: x[::-1] # Reverse the entire word - self.hashcat_rule["{"] = lambda x: x[1:] + x[0] # Rotate the word left - self.hashcat_rule["}"] = lambda x: x[-1] + x[:-1] # Rotate the word right - - # Duplication rules - self.hashcat_rule["d"] = lambda x: x + x # Duplicate entire word - self.hashcat_rule["p"] = lambda x, y: x * y # Duplicate entire word N times - self.hashcat_rule["f"] = lambda x: x + x[::-1] # Duplicate word reversed - self.hashcat_rule["z"] = lambda x, y: x[0] * y + x # Duplicate first character N times - self.hashcat_rule["Z"] = lambda x, y: x + x[-1] * y # Duplicate last character N times - self.hashcat_rule["q"] = lambda x: "".join([i + i for i in x]) # Duplicate every character - self.hashcat_rule["y"] = lambda x, y: x[:y] + x # Duplicate first N characters - self.hashcat_rule["Y"] = lambda x, y: x + x[-y:] # Duplicate last N characters - - # Cutting rules - self.hashcat_rule["["] = lambda x: x[1:] # Delete first character - self.hashcat_rule["]"] = lambda x: x[:-1] # Delete last character - self.hashcat_rule["D"] = lambda x, y: x[:y] + x[y + 1:] # Deletes character at position N - self.hashcat_rule["'"] = lambda x, y: x[:y] # Truncate word at position N - self.hashcat_rule["x"] = lambda x, y, z: x[:y] + x[y + z:] # Delete M characters, starting at position N - self.hashcat_rule["@"] = lambda x, y: x.replace(y, '') # Purge all instances of X - - # Insertion rules - self.hashcat_rule["$"] = lambda x, y: x + y # Append character to end - self.hashcat_rule["^"] = lambda x, y: y + x # Prepend character to front - self.hashcat_rule["i"] = lambda x, y, z: x[:y] + z + x[y:] # Insert character X at position N - - # Replacement rules - self.hashcat_rule["o"] = lambda x, y, z: x[:y] + z + x[y + 1:] # Overwrite character at position N with X - self.hashcat_rule["s"] = lambda x, y, z: x.replace(y, z) # Replace all instances of X with Y - self.hashcat_rule["L"] = lambda x, y: x[:y] + chr(ord(x[y]) << 1) + x[ - y + 1:] # Bitwise shift left character @ N - self.hashcat_rule["R"] = lambda x, y: x[:y] + chr(ord(x[y]) >> 1) + x[ - y + 1:] # Bitwise shift right character @ N - self.hashcat_rule["+"] = lambda x, y: x[:y] + chr(ord(x[y]) + 1) + x[ - y + 1:] # Increment character @ N by 1 ascii value - self.hashcat_rule["-"] = lambda x, y: x[:y] + chr(ord(x[y]) - 1) + x[ - y + 1:] # Decrement character @ N by 1 ascii value - self.hashcat_rule["."] = lambda x, y: x[:y] + x[y + 1] + x[ - y + 1:] # Replace character @ N with value at @ N plus 1 - self.hashcat_rule[","] = lambda x, y: x[:y] + x[y - 1] + x[ - y + 1:] # Replace character @ N with value at @ N minus 1 - - # Swappping rules - self.hashcat_rule["k"] = lambda x: x[1] + x[0] + x[2:] # Swap first two characters - self.hashcat_rule["K"] = lambda x: x[:-2] + x[-1] + x[-2] # Swap last two characters - self.hashcat_rule["*"] = lambda x, y, z: x[:y] + x[z] + x[y + 1:z] + x[y] + x[z + 1:] if z > y else x[:z] + x[ - y] + x[z + 1:y] + x[z] + x[y + 1:] # Swap character X with Y - - ######################################################################## - # Common numeric and special character substitutions (1337 5p34k) - self.leet = dict() - self.leet["1"] = "i" - self.leet["2"] = "z" - self.leet["3"] = "e" - self.leet["4"] = "a" - self.leet["5"] = "s" - self.leet["6"] = "b" - self.leet["7"] = "t" - self.leet["8"] = "b" - self.leet["9"] = "g" - self.leet["0"] = "o" - self.leet["!"] = "i" - self.leet["|"] = "i" - self.leet["@"] = "a" - self.leet["$"] = "s" - self.leet["+"] = "t" - - ######################################################################## - # Preanalysis rules to bruteforce for each word - self.preanalysis_rules = [] - self.preanalysis_rules.append(([], self.hashcat_rule[':'])) # Blank rule - self.preanalysis_rules.append((['r'], self.hashcat_rule['r'])) # Reverse rule - # self.preanalysis_rules.append((['{'],self.hashcat_rule['}'])) # Rotate left - # self.preanalysis_rules.append((['}'],self.hashcat_rule['{'])) # Rotate right - - ############################################################################ - # Calculate Levenshtein edit path matrix - def levenshtein(self, word, password): - matrix = [] - - # Generate and populate the initial matrix - for i in xrange(len(password) + 1): - matrix.append([]) - for j in xrange(len(word) + 1): - if i == 0: - matrix[i].append(j) - elif j == 0: - matrix[i].append(i) - else: - matrix[i].append(0) - - # Calculate edit distance for each substring - for i in xrange(1, len(password) + 1): - for j in xrange(1, len(word) + 1): - if password[i - 1] == word[j - 1]: - matrix[i][j] = matrix[i - 1][j - 1] - else: - insertion = matrix[i - 1][j] + 1 - deletion = matrix[i][j - 1] + 1 - substitution = matrix[i - 1][j - 1] + 1 - matrix[i][j] = min(insertion, deletion, substitution) - - return matrix - - def levenshtein_distance(self, s1, s2): - """Calculate the Levenshtein distance between two strings. - - This is straight from Wikipedia. - """ - if len(s1) < len(s2): - return self.levenshtein_distance(s2, s1) - if not s1: - return len(s2) - - previous_row = xrange(len(s2) + 1) - for i, c1 in enumerate(s1): - current_row = [i + 1] - for j, c2 in enumerate(s2): - insertions = previous_row[j + 1] + 1 - deletions = current_row[j] + 1 - substitutions = previous_row[j] + (c1 != c2) - current_row.append(min(insertions, deletions, substitutions)) - previous_row = current_row - - return previous_row[-1] - - def levenshtein_print(self, matrix, word, password): - """ Print word X password matrix """ - print " %s" % " ".join(list(word)) - for i, row in enumerate(matrix): - if i == 0: - print " ", - else: - print password[i - 1], - print " ".join("%2d" % col for col in row) - - def generate_levenshtein_rules(self, word, password): - """ Generates levenshtein rules. Returns a list of lists of levenshtein rules. """ - - # 1) Generate Levenshtein matrix - matrix = self.levenshtein(word, password) - - # 2) Trace reverse paths through the matrix. - paths = self.levenshtein_reverse_recursive(matrix, len(matrix) - 1, len(matrix[0]) - 1, 0) - - # 3) Return a collection of reverse paths. - return [path for path in paths if len(path) <= matrix[-1][-1]] - - def levenshtein_reverse_recursive(self, matrix, i, j, path_len): - """ Calculate reverse Levenshtein paths. - Recursive, Depth First, Short-circuited algorithm by Peter Kacherginsky - Generates a list of edit operations necessary to transform a source word - into a password. Edit operations are recorded in the form: - (operation, password_offset, word_offset) - Where an operation can be either insertion, deletion or replacement. - """ - - if i == 0 and j == 0 or path_len > matrix[-1][-1]: - return [[]] - else: - paths = list() - - cost = matrix[i][j] - - # Calculate minimum cost of each operation - cost_delete = cost_insert = cost_equal_or_replace = sys.maxint - if i > 0: cost_insert = matrix[i - 1][j] - if j > 0: cost_delete = matrix[i][j - 1] - if i > 0 and j > 0: cost_equal_or_replace = matrix[i - 1][j - 1] - cost_min = min(cost_delete, cost_insert, cost_equal_or_replace) - - # Recurse through reverse path for each operation - if cost_insert == cost_min: - insert_paths = self.levenshtein_reverse_recursive(matrix, i - 1, j, path_len + 1) - for insert_path in insert_paths: paths.append(insert_path + [('insert', i - 1, j)]) - - if cost_delete == cost_min: - delete_paths = self.levenshtein_reverse_recursive(matrix, i, j - 1, path_len + 1) - for delete_path in delete_paths: paths.append(delete_path + [('delete', i, j - 1)]) - - if cost_equal_or_replace == cost_min: - if cost_equal_or_replace == cost: - equal_paths = self.levenshtein_reverse_recursive(matrix, i - 1, j - 1, path_len) - for equal_path in equal_paths: paths.append(equal_path) - else: - replace_paths = self.levenshtein_reverse_recursive(matrix, i - 1, j - 1, path_len + 1) - for replace_path in replace_paths: paths.append(replace_path + [('replace', i - 1, j - 1)]) - - return paths - - def load_custom_wordlist(self, wordlist_file): - self.enchant = enchant.request_pwl_dict(wordlist_file) - - def generate_words(self, password): - """ Generate source word candidates.""" - - if self.debug: print - "[*] Generating source words for %s" % password - - words = list() - words_collection = list() - - # Let's collect best edit distance as soon as possible to prevent - # less efficient pre_rules like reversal and rotation from slowing - # us down with garbage - best_found_distance = 9999 - - ####################################################################### - # Generate words for each preanalysis rule - if not self.brute_rules: - self.preanalysis_rules = self.preanalysis_rules[:1] - - for pre_rule, pre_rule_lambda in self.preanalysis_rules: - - pre_password = pre_rule_lambda(password) - - # Generate word suggestions - if self.word: - suggestions = [self.word] - elif self.simple_words: - suggestions = self.generate_simple_words(pre_password) - else: - suggestions = self.generate_advanced_words(pre_password) - - # HACK: Perform some additional expansion on multi-word suggestions - # TODO: May be I should split these two and see if I can generate - # rules for each of the suggestions - for suggestion in suggestions[:self.max_words]: - suggestion = suggestion.replace(' ', '') - suggestion = suggestion.replace('-', '') - if not suggestion in suggestions: - suggestions.append(suggestion) - - if len(suggestions) != len(set(suggestions)): - print - sorted(suggestions) - print - sorted(set(suggestions)) - - for suggestion in suggestions: - distance = self.levenshtein_distance(suggestion, pre_password) - - word = dict() - word["suggestion"] = suggestion - word["distance"] = distance - word["password"] = pre_password - word["pre_rule"] = pre_rule - word["best_rule_length"] = 9999 - - words.append(word) - - ####################################################################### - # Perform Optimization - for word in sorted(words, key=lambda word: word["distance"], reverse=False): - - # Optimize for best distance - if not self.more_words: - if word["distance"] < best_found_distance: - best_found_distance = word["distance"] - - elif word["distance"] > best_found_distance: - if self.verbose: - print - "[-] %s => {edit distance suboptimal: %d (%d)} => %s" % \ - (word["suggestion"], word["distance"], best_found_distance, word["password"]) - break - - # Filter words with too big edit distance - if word["distance"] <= self.max_word_dist: - if self.debug: - print - "[+] %s => {edit distance: %d (%d)} = > %s" % \ - (word["suggestion"], word["distance"], best_found_distance, word["password"]) - - words_collection.append(word) - - else: - if self.verbose: - print - "[-] %s => {max distance exceeded: %d (%d)} => %s" % \ - (word["suggestion"], word["distance"], self.max_word_dist, word["password"]) - - if self.max_words: - words_collection = words_collection[:self.max_words] - - return words_collection - - def generate_simple_words(self, password): - """ Generate simple words. A simple spellcheck.""" - - return self.enchant.suggest(password) - - def generate_advanced_words(self, password): - """ Generate advanced words. - Perform some additional non-destructive cleaning to help spell-checkers: - 1) Remove non-alpha prefixes and appendixes. - 2) Perform common pattern matches (e.g. email). - 3) Replace non-alpha character substitutions (1337 5p34k) - """ - - # Remove non-alpha prefix and/or appendix - insertion_matches = self.password_pattern["insertion"].match(password) - if insertion_matches: - password = insertion_matches.group('password') - - # Pattern matches - email_matches = self.password_pattern["email"].match(password) - if email_matches: - password = email_matches.group('password') - - # Replace common special character replacements (1337 5p34k) - preanalysis_password = '' - for c in password: - if c in self.leet: - preanalysis_password += self.leet[c] - else: - preanalysis_password += c - password = preanalysis_password - - if self.debug: "[*] Preanalysis Password: %s" % password - - return self.enchant.suggest(password) - - ############################################################################ - # Hashcat specific offset definition 0-9,A-Z - def int_to_hashcat(self, N): - if N < 10: - return N - else: - return chr(65 + N - 10) - - def hashcat_to_int(self, N): - if N.isdigit(): - return int(N) - else: - return ord(N) - 65 + 10 - - def generate_hashcat_rules(self, suggestion, password): - """ Generate hashcat rules. Returns a length sorted list of lists of hashcat rules.""" - - # 2) Generate Levenshtein Rules - lev_rules = self.generate_levenshtein_rules(suggestion, password) - - # 3) Generate Hashcat Rules - hashcat_rules = [] - hashcat_rules_collection = [] - - ####################################################################### - # Generate hashcat rule for each levenshtein rule - for lev_rule in lev_rules: - - if self.simple_rules: - hashcat_rule = self.generate_simple_hashcat_rules(suggestion, lev_rule, password) - else: - hashcat_rule = self.generate_advanced_hashcat_rules(suggestion, lev_rule, password) - - if hashcat_rule == None: - print - "[!] Processing FAILED: %s => ;( => %s" % (suggestion, password) - print - " Sorry about that, please report this failure to" - print - " the developer: iphelix [at] thesprawl.org" - - else: - hashcat_rules.append(hashcat_rule) - - best_found_rule_length = 9999 - - ####################################################################### - # Perform Optimization - for hashcat_rule in sorted(hashcat_rules, key=lambda hashcat_rule: len(hashcat_rule)): - - rule_length = len(hashcat_rule) - - if not self.more_rules: - if rule_length < best_found_rule_length: - best_found_rule_length = rule_length - - elif rule_length > best_found_rule_length: - if self.verbose: - print - "[-] %s => {best rule length exceeded: %d (%d)} => %s" % \ - (suggestion, rule_length, best_found_rule_length, password) - break - - if rule_length <= self.max_rule_len: - hashcat_rules_collection.append(hashcat_rule) - - return hashcat_rules_collection - - def generate_simple_hashcat_rules(self, word, rules, password): - """ Generate basic hashcat rules using only basic insert,delete,replace rules. """ - hashcat_rules = [] - - if self.debug: print - "[*] Simple Processing %s => %s" % (word, password) - - # Dynamically apply rules to the source word - # NOTE: Special case were word == password this would work as well. - word_rules = word - - for (op, p, w) in rules: - - if self.debug: print - "\t[*] Simple Processing Started: %s - %s" % (word_rules, " ".join(hashcat_rules)) - - if op == 'insert': - hashcat_rules.append("i%s%s" % (self.int_to_hashcat(p), password[p])) - word_rules = self.hashcat_rule['i'](word_rules, p, password[p]) - - elif op == 'delete': - hashcat_rules.append("D%s" % self.int_to_hashcat(p)) - word_rules = self.hashcat_rule['D'](word_rules, p) - - elif op == 'replace': - hashcat_rules.append("o%s%s" % (self.int_to_hashcat(p), password[p])) - word_rules = self.hashcat_rule['o'](word_rules, p, password[p]) - - if self.debug: print - "\t[*] Simple Processing Ended: %s => %s => %s" % (word_rules, " ".join(hashcat_rules), password) - - # Check if rules result in the correct password - if word_rules == password: - return hashcat_rules - else: - if self.debug: print - "[!] Simple Processing FAILED: %s => %s => %s (%s)" % (word, " ".join(hashcat_rules), password, word_rules) - return None - - def generate_advanced_hashcat_rules(self, word, rules, password): - """ Generate advanced hashcat rules using full range of available rules. """ - hashcat_rules = [] - - if self.debug: print - "[*] Advanced Processing %s => %s" % (word, password) - - # Dynamically apply and store rules in word_rules variable. - # NOTE: Special case where word == password this would work as well. - word_rules = word - - # Generate case statistics - password_lower = len([c for c in password if c.islower()]) - password_upper = len([c for c in password if c.isupper()]) - - for i, (op, p, w) in enumerate(rules): - - if self.debug: print - "\t[*] Advanced Processing Started: %s - %s" % (word_rules, " ".join(hashcat_rules)) - - if op == 'insert': - hashcat_rules.append("i%s%s" % (self.int_to_hashcat(p), password[p])) - word_rules = self.hashcat_rule['i'](word_rules, p, password[p]) - - elif op == 'delete': - hashcat_rules.append("D%s" % self.int_to_hashcat(p)) - word_rules = self.hashcat_rule['D'](word_rules, p) - - elif op == 'replace': - - # Detecting global replacement such as sXY, l, u, C, c is a non - # trivial problem because different characters may be added or - # removed from the word by other rules. A reliable way to solve - # this problem is to apply all of the rules the source word - # and keep track of its state at any given time. At the same - # time, global replacement rules can be tested by completing - # the rest of the rules using a simplified engine. - - # The sequence of if statements determines the priority of rules - - # This rule was made obsolete by a prior global replacement - if word_rules[p] == password[p]: - if self.debug: print - "\t[*] Advanced Processing Obsolete Rule: %s - %s" % (word_rules, " ".join(hashcat_rules)) - - # Swapping rules - elif p < len(password) - 1 and p < len(word_rules) - 1 and word_rules[p] == password[p + 1] and - word_rules[p + 1] == password[p]: - # Swap first two characters - if p == 0 and self.generate_simple_hashcat_rules(self.hashcat_rule['k'](word_rules), rules[i + 1:], - password): - hashcat_rules.append("k") - word_rules = self.hashcat_rule['k'](word_rules) - # Swap last two characters - elif p == len(word_rules) - 2 and self.generate_simple_hashcat_rules( - self.hashcat_rule['K'](word_rules), rules[i + 1:], password): - hashcat_rules.append("K") - word_rules = self.hashcat_rule['K'](word_rules) - # Swap any two characters (only adjacent swapping is supported) - elif self.generate_simple_hashcat_rules(self.hashcat_rule['*'](word_rules, p, p + 1), rules[i + 1:], - password): - hashcat_rules.append("*%s%s" % (self.int_to_hashcat(p), self.int_to_hashcat(p + 1))) - word_rules = self.hashcat_rule['*'](word_rules, p, p + 1) - else: - hashcat_rules.append("o%s%s" % (self.int_to_hashcat(p), password[p])) - word_rules = self.hashcat_rule['o'](word_rules, p, password[p]) - - # Case Toggle: Uppercased a letter - elif word_rules[p].islower() and word_rules[p].upper() == password[ - p]: # Toggle the case of all characters in word (mixed cases) - if password_upper and password_lower and self.generate_simple_hashcat_rules( - self.hashcat_rule['t'](word_rules), rules[i + 1:], password): - hashcat_rules.append("t") - word_rules = self.hashcat_rule['t'](word_rules) - - # Capitalize all letters - elif self.generate_simple_hashcat_rules(self.hashcat_rule['u'](word_rules), rules[i + 1:], - password): - hashcat_rules.append("u") - word_rules = self.hashcat_rule['u'](word_rules) - - # Capitalize the first letter - elif p == 0 and self.generate_simple_hashcat_rules(self.hashcat_rule['c'](word_rules), - rules[i + 1:], password): - hashcat_rules.append("c") - word_rules = self.hashcat_rule['c'](word_rules) - - # Toggle the case of characters at position N - else: - hashcat_rules.append("T%s" % self.int_to_hashcat(p)) - word_rules = self.hashcat_rule['T'](word_rules, p) - - # Case Toggle: Lowercased a letter - elif word_rules[p].isupper() and word_rules[p].lower() == password[p]: - - # Toggle the case of all characters in word (mixed cases) - if password_upper and password_lower and self.generate_simple_hashcat_rules( - self.hashcat_rule['t'](word_rules), rules[i + 1:], password): - hashcat_rules.append("t") - word_rules = self.hashcat_rule['t'](word_rules) - - # Lowercase all letters - elif self.generate_simple_hashcat_rules(self.hashcat_rule['l'](word_rules), rules[i + 1:], - password): - hashcat_rules.append("l") - word_rules = self.hashcat_rule['l'](word_rules) - - # Lowercase the first found character, uppercase the rest - elif p == 0 and self.generate_simple_hashcat_rules(self.hashcat_rule['C'](word_rules), - rules[i + 1:], password): - hashcat_rules.append("C") - word_rules = self.hashcat_rule['C'](word_rules) - - # Toggle the case of characters at position N - else: - hashcat_rules.append("T%s" % self.int_to_hashcat(p)) - word_rules = self.hashcat_rule['T'](word_rules, p) - - # Special case substitution of 'all' instances (1337 $p34k) - elif word_rules[p].isalpha() and not password[p].isalpha() and self.generate_simple_hashcat_rules( - self.hashcat_rule['s'](word_rules, word_rules[p], password[p]), rules[i + 1:], password): - - # If we have already detected this rule, then skip it thus - # reducing total rule count. - # BUG: Elisabeth => sE3 sl1 u o3Z sE3 => 31IZAB3TH - # if not "s%s%s" % (word_rules[p],password[p]) in hashcat_rules: - hashcat_rules.append("s%s%s" % (word_rules[p], password[p])) - word_rules = self.hashcat_rule['s'](word_rules, word_rules[p], password[p]) - - # Replace next character with current - elif p < len(password) - 1 and p < len(word_rules) - 1 and password[p] == password[p + 1] and password[ - p] == word_rules[p + 1]: - hashcat_rules.append(".%s" % self.int_to_hashcat(p)) - word_rules = self.hashcat_rule['.'](word_rules, p) - - # Replace previous character with current - elif p > 0 and w > 0 and password[p] == password[p - 1] and password[p] == word_rules[p - 1]: - hashcat_rules.append(",%s" % self.int_to_hashcat(p)) - word_rules = self.hashcat_rule[','](word_rules, p) - - # ASCII increment - elif ord(word_rules[p]) + 1 == ord(password[p]): - hashcat_rules.append("+%s" % self.int_to_hashcat(p)) - word_rules = self.hashcat_rule['+'](word_rules, p) - - # ASCII decrement - elif ord(word_rules[p]) - 1 == ord(password[p]): - hashcat_rules.append("-%s" % self.int_to_hashcat(p)) - word_rules = self.hashcat_rule['-'](word_rules, p) - - # SHIFT left - elif ord(word_rules[p]) << 1 == ord(password[p]): - hashcat_rules.append("L%s" % self.int_to_hashcat(p)) - word_rules = self.hashcat_rule['L'](word_rules, p) - - # SHIFT right - elif ord(word_rules[p]) >> 1 == ord(password[p]): - hashcat_rules.append("R%s" % self.int_to_hashcat(p)) - word_rules = self.hashcat_rule['R'](word_rules, p) - - # Position based replacements. - else: - hashcat_rules.append("o%s%s" % (self.int_to_hashcat(p), password[p])) - word_rules = self.hashcat_rule['o'](word_rules, p, password[p]) - - if self.debug: print - "\t[*] Advanced Processing Ended: %s %s" % (word_rules, " ".join(hashcat_rules)) - - ######################################################################## - # Prefix rules - last_prefix = 0 - prefix_rules = list() - for hashcat_rule in hashcat_rules: - if hashcat_rule[0] == "i" and self.hashcat_to_int(hashcat_rule[1]) == last_prefix: - prefix_rules.append("^%s" % hashcat_rule[2]) - last_prefix += 1 - elif len(prefix_rules): - hashcat_rules = prefix_rules[::-1] + hashcat_rules[len(prefix_rules):] - break - else: - break - else: - hashcat_rules = prefix_rules[::-1] + hashcat_rules[len(prefix_rules):] - - #################################################################### - # Appendix rules - last_appendix = len(password) - 1 - appendix_rules = list() - for hashcat_rule in hashcat_rules[::-1]: - if hashcat_rule[0] == "i" and self.hashcat_to_int(hashcat_rule[1]) == last_appendix: - appendix_rules.append("$%s" % hashcat_rule[2]) - last_appendix -= 1 - elif len(appendix_rules): - hashcat_rules = hashcat_rules[:-len(appendix_rules)] + appendix_rules[::-1] - break - else: - break - else: - hashcat_rules = hashcat_rules[:-len(appendix_rules)] + appendix_rules[::-1] - - #################################################################### - # Truncate left rules - last_precut = 0 - precut_rules = list() - for hashcat_rule in hashcat_rules: - if hashcat_rule[0] == "D" and self.hashcat_to_int(hashcat_rule[1]) == last_precut: - precut_rules.append("[") - elif len(precut_rules): - hashcat_rules = precut_rules[::-1] + hashcat_rules[len(precut_rules):] - break - else: - break - else: - hashcat_rules = precut_rules[::-1] + hashcat_rules[len(precut_rules):] - - #################################################################### - # Truncate right rules - last_postcut = len(password) - postcut_rules = list() - for hashcat_rule in hashcat_rules[::-1]: - - if hashcat_rule[0] == "D" and self.hashcat_to_int(hashcat_rule[1]) >= last_postcut: - postcut_rules.append("]") - elif len(postcut_rules): - hashcat_rules = hashcat_rules[:-len(postcut_rules)] + postcut_rules[::-1] - break - else: - break - else: - hashcat_rules = hashcat_rules[:-len(postcut_rules)] + postcut_rules[::-1] - - # Check if rules result in the correct password - if word_rules == password: - return hashcat_rules - else: - if self.debug: print - "[!] Advanced Processing FAILED: %s => %s => %s (%s)" % ( - word, " ".join(hashcat_rules), password, word_rules) - return None - - -def check_reversible_password(self, password): - """ Check whether the password is likely to be reversed successfuly. """ - - # Skip all numeric passwords - if password.isdigit(): - if self.verbose and not self.quiet: print - "[!] %s => {skipping numeric} => %s" % (password, password) - self.numeric_stats_total += 1 - return False - - # Skip passwords with less than 25% of alpha character - # TODO: Make random word detection more reliable based on word entropy. - elif len([c for c in password if c.isalpha()]) < len(password) / 4: - if self.verbose and not self.quiet: print - "[!] %s => {skipping alpha less than 25%%} => %s" % (password, password) - self.special_stats_total += 1 - return False - - # Only check english ascii passwords for now - # TODO: Add support for more languages. - elif [c for c in password if ord(c) < 32 or ord(c) > 126]: - if self.verbose and not self.quiet: print - "[!] %s => {skipping non ascii english} => %s" % (password, password) - self.foreign_stats_total += 1 - return False - - else: - return True - - -def analyze_password(self, password, rules_queue=multiprocessing.Queue(), words_queue=multiprocessing.Queue()): - """ Analyze a single password. """ - - if self.verbose: print - "[*] Analyzing password: %s" % password - - words = [] - - # Short-cut words in the dictionary - if self.enchant.check(password) and not self.word: - - word = dict() - word["password"] = password - word["suggestion"] = password - word["hashcat_rules"] = [[], ] - word["pre_rule"] = [] - word["best_rule_length"] = 9999 - - words.append(word) - - # Generate rules for words not in the dictionary - else: - - # Generate source words list - words = self.generate_words(password) - - # Generate levenshtein reverse paths for each suggestion - for word in words: - # Generate a collection of hashcat_rules lists - word["hashcat_rules"] = self.generate_hashcat_rules(word["suggestion"], word["password"]) - - self.print_hashcat_rules(words, password, rules_queue, words_queue) - - -def print_hashcat_rules(self, words, password, rules_queue, words_queue): - best_found_rule_length = 9999 - - # Sorted list based on rule length - for word in sorted(words, key=lambda word: len(word["hashcat_rules"][0])): - - words_queue.put(word["suggestion"]) - - for hashcat_rule in word["hashcat_rules"]: - - rule_length = len(hashcat_rule) - - if not self.more_rules: - if rule_length < best_found_rule_length: - best_found_rule_length = rule_length - - elif rule_length > best_found_rule_length: - if self.verbose: - print - "[-] %s => {best rule length exceeded: %d (%d)} => %s" % \ - (word["suggestion"], rule_length, best_found_rule_length, password) - break - - if rule_length <= self.max_rule_len: - - hashcat_rule_str = " ".join(hashcat_rule + word["pre_rule"] or [':']) - if self.verbose: print - "[+] %s => %s => %s" % (word["suggestion"], hashcat_rule_str, password) - - rules_queue.put(hashcat_rule_str) - - -def password_worker(self, i, passwords_queue, rules_queue, words_queue): - if self.debug: print - "[*] Password analysis worker [%d] started." % i - try: - while True: - password = passwords_queue.get() - - # Interrupted by a Death Pill - if password == None: break - - self.analyze_password(password, rules_queue, words_queue) - except (KeyboardInterrupt, SystemExit): - if self.debug: print - "[*] Password analysis worker [%d] terminated." % i - - if self.debug: print - "[*] Password analysis worker [%d] stopped." % i - - -def rule_worker(self, rules_queue, output_rules_filename): - """ Worker to store generated rules. """ - print - "[*] Saving rules to %s" % output_rules_filename - - f = open(output_rules_filename, 'w') - if self.debug: print - "[*] Rule worker started." - try: - while True: - rule = rules_queue.get() - - # Interrupted by a Death Pill - if rule == None: break - - f.write("%s\n" % rule) - f.flush() - - except (KeyboardInterrupt, SystemExit): - if self.debug: print - "[*] Rule worker terminated." - - f.close() - if self.debug: print - "[*] Rule worker stopped." - - -def word_worker(self, words_queue, output_words_filename): - """ Worker to store generated rules. """ - print - "[*] Saving words to %s" % output_words_filename - - f = open(output_words_filename, 'w') - if self.debug: print - "[*] Word worker started." - try: - while True: - word = words_queue.get() - - # Interrupted by a Death Pill - if word == None: break - - f.write("%s\n" % word) - f.flush() - - except (KeyboardInterrupt, SystemExit): - if self.debug: print - "[*] Word worker terminated." - - f.close() - if self.debug: print - "[*] Word worker stopped." - - -# Analyze passwords file -def analyze_passwords_file(self, passwords_file): - """ Analyze provided passwords file. """ - - print - "[*] Analyzing passwords file: %s:" % passwords_file - print - "[*] Press Ctrl-C to end execution and generate statistical analysis." - - # Setup queues - passwords_queue = multiprocessing.Queue(multiprocessing.cpu_count() * 100) - rules_queue = multiprocessing.Queue() - words_queue = multiprocessing.Queue() - - # Start workers - for i in range(multiprocessing.cpu_count()): - multiprocessing.Process(target=self.password_worker, - args=(i, passwords_queue, rules_queue, words_queue)).start() - multiprocessing.Process(target=self.rule_worker, args=(rules_queue, "%s.rule" % self.basename)).start() - multiprocessing.Process(target=self.word_worker, args=(words_queue, "%s.word" % self.basename)).start() - - # Continue with the main thread - - f = open(passwords_file, 'r') - - password_count = 0 - analysis_start = time.time() - segment_start = analysis_start - try: - for password in f: - password = password.rstrip('\r\n') - if len(password) > 0: - - # Provide analysis time feedback to the user - if not self.quiet and password_count != 0 and password_count % 5000 == 0: - segment_time = time.time() - segment_start - print - "[*] Processed %d passwords in %.2f seconds at the rate of %.2f p/sec" % \ - (password_count, segment_start - analysis_start, 5000 / segment_time) - segment_start = time.time() - - password_count += 1 - - # Perform preliminary checks and add password to the queue - if self.check_reversible_password(password): - passwords_queue.put(password) - - except (KeyboardInterrupt, SystemExit): - print - "\n[!] Rulegen was interrupted." - - else: - # Signal workers to stop. - for i in range(multiprocessing.cpu_count()): - passwords_queue.put(None) - - # Wait for all of the queued passwords to finish. - while not passwords_queue.empty(): - time.sleep(1) - - # Signal writers to stop. - rules_queue.put(None) - words_queue.put(None) - - f.close() - - analysis_time = time.time() - analysis_start - print - "[*] Finished processing %d passwords in %.2f seconds at the rate of %.2f p/sec" % ( - password_count, analysis_time, float(password_count) / analysis_time) - - print - "[*] Generating statistics for [%s] rules and words." % self.basename - print - "[-] Skipped %d all numeric passwords (%0.2f%%)" % \ - (self.numeric_stats_total, float(self.numeric_stats_total) * 100.0 / float(password_count)) - print - "[-] Skipped %d passwords with less than 25%% alpha characters (%0.2f%%)" % \ - (self.special_stats_total, float(self.special_stats_total) * 100.0 / float(password_count)) - print - "[-] Skipped %d passwords with non ascii characters (%0.2f%%)" % \ - (self.foreign_stats_total, float(self.foreign_stats_total) * 100.0 / float(password_count)) - - # TODO: Counter breaks on large files. uniq -c | sort -rn is still the most - # optimal way. - rules_file = open("%s.rule" % self.basename, 'r') - rules_sorted_file = open("%s-sorted.rule" % self.basename, 'w') - rules_counter = Counter(rules_file) - rule_counter_total = sum(rules_counter.values()) - - print - "\n[*] Top 10 rules" - rules_i = 0 - for (rule, count) in rules_counter.most_common(): - rules_sorted_file.write(rule) - if rules_i < 10: print - "[+] %s - %d (%0.2f%%)" % (rule.rstrip('\r\n'), count, count * 100 / rule_counter_total) - rules_i += 1 - - rules_file.close() - rules_sorted_file.close() - - words_file = open("%s.word" % self.basename, 'r') - words_sorted_file = open("%s-sorted.word" % self.basename, 'w') - words_counter = Counter(words_file) - word_counter_total = sum(rules_counter.values()) - - print - "\n[*] Top 10 words" - words_i = 0 - for (word, count) in words_counter.most_common(): - words_sorted_file.write(word) - if words_i < 10: print - "[+] %s - %d (%0.2f%%)" % (word.rstrip('\r\n'), count, count * 100 / word_counter_total) - words_i += 1 - - words_file.close() - words_sorted_file.close() - - -############################################################################ -def verify_hashcat_rules(self, word, rules, password): - f = open("%s/test.rule" % HASHCAT_PATH, 'w') - f.write(" ".join(rules)) - f.close() - - f = open("%s/test.word" % HASHCAT_PATH, 'w') - f.write(word) - f.close() - - p = subprocess.Popen(["%s/hashcat-cli64.bin" % HASHCAT_PATH, "-r", "%s/test.rule" % HASHCAT_PATH, "--stdout", - "%s/test.word" % HASHCAT_PATH], stdout=subprocess.PIPE) - out, err = p.communicate() - out = out.strip() - - if out == password: - hashcat_rules_str = " ".join(rules or [':']) - if self.verbose: print - "[+] %s => %s => %s" % (word, hashcat_rules_str, password) - - else: - print - "[!] Hashcat Verification FAILED: %s => %s => %s (%s)" % (word, " ".join(rules or [':']), password, out) - - -if __name__ == "__main__": - - header = " _ \n" - header += " RuleGen %s | |\n" % VERSION - header += " _ __ __ _ ___| | _\n" - header += " | '_ \ / _` |/ __| |/ /\n" - header += " | |_) | (_| | (__| < \n" - header += " | .__/ \__,_|\___|_|\_\\\n" - header += " | | \n" - header += " |_| iphelix@thesprawl.org\n" - header += "\n" - - parser = OptionParser("%prog [options] passwords.txt", version="%prog " + VERSION) - - parser.add_option("-b", "--basename", - help="Output base name. The following files will be generated: basename.words, basename.rules and basename.stats", - default="analysis", metavar="rockyou") - parser.add_option("-w", "--wordlist", help="Use a custom wordlist for rule analysis.", metavar="wiki.dict") - parser.add_option("-q", "--quiet", action="store_true", dest="quiet", default=False, help="Don't show headers.") - parser.add_option("--threads", type="int", default=10, help="Parallel threads to use for processing.") - - wordtune = OptionGroup(parser, "Fine tune source word generation:") - wordtune.add_option("--maxworddist", help="Maximum word edit distance (Levenshtein)", type="int", default=10, - metavar="10") - wordtune.add_option("--maxwords", help="Maximum number of source word candidates to consider", type="int", - default=5, metavar="5") - wordtune.add_option("--morewords", help="Consider suboptimal source word candidates", action="store_true", - default=False) - wordtune.add_option("--simplewords", help="Generate simple source words for given passwords", action="store_true", - default=False) - parser.add_option_group(wordtune) - - ruletune = OptionGroup(parser, "Fine tune rule generation:") - ruletune.add_option("--maxrulelen", help="Maximum number of operations in a single rule", type="int", default=10, - metavar="10") - ruletune.add_option("--maxrules", help="Maximum number of rules to consider", type="int", default=5, metavar="5") - ruletune.add_option("--morerules", help="Generate suboptimal rules", action="store_true", default=False) - ruletune.add_option("--simplerules", help="Generate simple rules insert,delete,replace", action="store_true", - default=False) - ruletune.add_option("--bruterules", help="Bruteforce reversal and rotation rules (slow)", action="store_true", - default=False) - parser.add_option_group(ruletune) - - spelltune = OptionGroup(parser, "Fine tune spell checker engine:") - spelltune.add_option("--providers", help="Comma-separated list of provider engines", default="aspell,myspell", - metavar="aspell,myspell") - parser.add_option_group(spelltune) - - debug = OptionGroup(parser, "Debuggin options:") - debug.add_option("-v", "--verbose", help="Show verbose information.", action="store_true", default=False) - debug.add_option("-d", "--debug", help="Debug rules.", action="store_true", default=False) - debug.add_option("--password", help="Process the last argument as a password not a file.", action="store_true", - default=False) - debug.add_option("--word", help="Use a custom word for rule analysis", metavar="Password") - debug.add_option("--hashcat", help="Test generated rules with hashcat-cli", action="store_true", default=False) - parser.add_option_group(debug) - - (options, args) = parser.parse_args() - - # Print program header - if not options.quiet: - print - header - - if len(args) < 1: - parser.error("no passwords file specified") - exit(1) - - rulegen = RuleGen(language="en", providers=options.providers, basename=options.basename, threads=options.threads) - - # Finetuning word generation - rulegen.max_word_dist = options.maxworddist - rulegen.max_words = options.maxwords - rulegen.more_words = options.morewords - rulegen.simple_words = options.simplewords - - # Finetuning rule generation - rulegen.max_rule_len = options.maxrulelen - rulegen.max_rules = options.maxrules - rulegen.more_rules = options.morerules - rulegen.simple_rules = options.simplerules - rulegen.brute_rules = options.bruterules - if rulegen.brute_rules: print - "[!] Bruteforcing reversal and rotation rules. (slower)" - - # Debugging options - rulegen.word = options.word - rulegen.verbose = options.verbose - rulegen.debug = options.debug - rulegen.hashcat = options.hashcat - rulegen.quiet = options.quiet - - # Custom wordlist - if not options.word: - if options.wordlist: rulegen.load_custom_wordlist(options.wordlist) - print - "[*] Using Enchant '%s' module. For best results please install" % rulegen.enchant.provider.name - print - " '%s' module language dictionaries." % rulegen.enchant.provider.name - - # Analyze a single password or several passwords in a file - if options.password: - rulegen.analyze_password(args[0]) - else: - rulegen.analyze_passwords_file(args[0]) diff --git a/PACK/statsgen.py b/PACK/statsgen.py old mode 100644 new mode 100755 index 17b9c0f..539cfa5 --- a/PACK/statsgen.py +++ b/PACK/statsgen.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # StatsGen - Password Statistical Analysis tool # # This tool is part of PACK (Password Analysis and Cracking Kit) @@ -11,7 +11,9 @@ # Please see the attached LICENSE file for additional licensing information. import sys -import re, operator, string +import re +import operator +import string from optparse import OptionParser, OptionGroup import time @@ -73,26 +75,30 @@ class StatsGen: if letter in string.digits: digit += 1 advancedmask_string += "?d" - if not simplemask or not simplemask[-1] == 'digit': simplemask.append('digit') + if not simplemask or not simplemask[-1] == 'digit': + simplemask.append('digit') - elif letter in string.lowercase: + elif letter in string.ascii_lowercase: lower += 1 advancedmask_string += "?l" - if not simplemask or not simplemask[-1] == 'string': simplemask.append('string') + if not simplemask or not simplemask[-1] == 'string': + simplemask.append('string') - - elif letter in string.uppercase: + elif letter in string.ascii_uppercase: upper += 1 advancedmask_string += "?u" - if not simplemask or not simplemask[-1] == 'string': simplemask.append('string') + if not simplemask or not simplemask[-1] == 'string': + simplemask.append('string') else: special += 1 advancedmask_string += "?s" - if not simplemask or not simplemask[-1] == 'special': simplemask.append('special') + if not simplemask or not simplemask[-1] == 'special': + simplemask.append('special') # String representation of masks - simplemask_string = ''.join(simplemask) if len(simplemask) <= 3 else 'othermask' + simplemask_string = ''.join(simplemask) if len( + simplemask) <= 3 else 'othermask' # Policy policy = (digit, lower, upper, special) @@ -136,106 +142,109 @@ class StatsGen: def generate_stats(self, filename): """ Generate password statistics. """ - f = open(filename, 'r') + with open(filename, 'r') as f: - for password in f: - password = password.rstrip('\r\n') + for password in f: + password = password.rstrip('\r\n') - if len(password) == 0: continue + if len(password) == 0: + continue - self.total_counter += 1 + self.total_counter += 1 - (pass_length, characterset, simplemask, advancedmask, policy) = self.analyze_password(password) - (digit, lower, upper, special) = policy + (pass_length, characterset, simplemask, advancedmask, + policy) = self.analyze_password(password) + (digit, lower, upper, special) = policy - if (self.charsets == None or characterset in self.charsets) and \ - (self.simplemasks == None or simplemask in self.simplemasks) and \ - (self.maxlength == None or pass_length <= self.maxlength) and \ - (self.minlength == None or pass_length >= self.minlength): + if (self.charsets == None or characterset in self.charsets) and \ + (self.simplemasks == None or simplemask in self.simplemasks) and \ + (self.maxlength == None or pass_length <= self.maxlength) and \ + (self.minlength == None or pass_length >= self.minlength): - self.filter_counter += 1 + self.filter_counter += 1 - if self.mindigit == None or digit < self.mindigit: self.mindigit = digit - if self.maxdigit == None or digit > self.maxdigit: self.maxdigit = digit + if self.mindigit == None or digit < self.mindigit: + self.mindigit = digit + if self.maxdigit == None or digit > self.maxdigit: + self.maxdigit = digit - if self.minupper == None or upper < self.minupper: self.minupper = upper - if self.maxupper == None or upper > self.maxupper: self.maxupper = upper + if self.minupper == None or upper < self.minupper: + self.minupper = upper + if self.maxupper == None or upper > self.maxupper: + self.maxupper = upper - if self.minlower == None or lower < self.minlower: self.minlower = lower - if self.maxlower == None or lower > self.maxlower: self.maxlower = lower + if self.minlower == None or lower < self.minlower: + self.minlower = lower + if self.maxlower == None or lower > self.maxlower: + self.maxlower = lower - if self.minspecial == None or special < self.minspecial: self.minspecial = special - if self.maxspecial == None or special > self.maxspecial: self.maxspecial = special + if self.minspecial == None or special < self.minspecial: + self.minspecial = special + if self.maxspecial == None or special > self.maxspecial: + self.maxspecial = special - if pass_length in self.stats_length: - self.stats_length[pass_length] += 1 - else: - self.stats_length[pass_length] = 1 + if pass_length in self.stats_length: + self.stats_length[pass_length] += 1 + else: + self.stats_length[pass_length] = 1 - if characterset in self.stats_charactersets: - self.stats_charactersets[characterset] += 1 - else: - self.stats_charactersets[characterset] = 1 + if characterset in self.stats_charactersets: + self.stats_charactersets[characterset] += 1 + else: + self.stats_charactersets[characterset] = 1 - if simplemask in self.stats_simplemasks: - self.stats_simplemasks[simplemask] += 1 - else: - self.stats_simplemasks[simplemask] = 1 + if simplemask in self.stats_simplemasks: + self.stats_simplemasks[simplemask] += 1 + else: + self.stats_simplemasks[simplemask] = 1 - if advancedmask in self.stats_advancedmasks: - self.stats_advancedmasks[advancedmask] += 1 - else: - self.stats_advancedmasks[advancedmask] = 1 - - f.close() + if advancedmask in self.stats_advancedmasks: + self.stats_advancedmasks[advancedmask] += 1 + else: + self.stats_advancedmasks[advancedmask] = 1 def print_stats(self): """ Print password statistics. """ - print - "[+] Analyzing %d%% (%d/%d) of passwords" % ( - self.filter_counter * 100 / self.total_counter, self.filter_counter, self.total_counter) - print - " NOTE: Statistics below is relative to the number of analyzed passwords, not total number of passwords" - print - "\n[*] Length:" - for (length, count) in sorted(self.stats_length.iteritems(), key=operator.itemgetter(1), reverse=True): - if self.hiderare and not count * 100 / self.filter_counter > 0: continue - print - "[+] %25d: %02d%% (%d)" % (length, count * 100 / self.filter_counter, count) + print("[+] Analyzing %d%% (%d/%d) of passwords" % (self.filter_counter * + 100/self.total_counter, self.filter_counter, self.total_counter)) + print(" NOTE: Statistics below is relative to the number of analyzed passwords, not total number of passwords") + print("\n[*] Length:") + for (length, count) in sorted(iter(self.stats_length.items()), key=operator.itemgetter(1), reverse=True): + if self.hiderare and not count*100/self.filter_counter > 0: + continue + print("[+] %25d: %02d%% (%d)" % + (length, count*100/self.filter_counter, count)) - print - "\n[*] Character-set:" - for (char, count) in sorted(self.stats_charactersets.iteritems(), key=operator.itemgetter(1), reverse=True): - if self.hiderare and not count * 100 / self.filter_counter > 0: continue - print - "[+] %25s: %02d%% (%d)" % (char, count * 100 / self.filter_counter, count) + print("\n[*] Character-set:") + for (char, count) in sorted(iter(self.stats_charactersets.items()), key=operator.itemgetter(1), reverse=True): + if self.hiderare and not count*100/self.filter_counter > 0: + continue + print("[+] %25s: %02d%% (%d)" % + (char, count*100/self.filter_counter, count)) - print - "\n[*] Password complexity:" - print - "[+] digit: min(%s) max(%s)" % (self.mindigit, self.maxdigit) - print - "[+] lower: min(%s) max(%s)" % (self.minlower, self.maxlower) - print - "[+] upper: min(%s) max(%s)" % (self.minupper, self.maxupper) - print - "[+] special: min(%s) max(%s)" % (self.minspecial, self.maxspecial) + print("\n[*] Password complexity:") + print("[+] digit: min(%s) max(%s)" % + (self.mindigit, self.maxdigit)) + print("[+] lower: min(%s) max(%s)" % + (self.minlower, self.maxlower)) + print("[+] upper: min(%s) max(%s)" % + (self.minupper, self.maxupper)) + print("[+] special: min(%s) max(%s)" % + (self.minspecial, self.maxspecial)) - print - "\n[*] Simple Masks:" - for (simplemask, count) in sorted(self.stats_simplemasks.iteritems(), key=operator.itemgetter(1), reverse=True): - if self.hiderare and not count * 100 / self.filter_counter > 0: continue - print - "[+] %25s: %02d%% (%d)" % (simplemask, count * 100 / self.filter_counter, count) + print("\n[*] Simple Masks:") + for (simplemask, count) in sorted(iter(self.stats_simplemasks.items()), key=operator.itemgetter(1), reverse=True): + if self.hiderare and not count*100/self.filter_counter > 0: + continue + print("[+] %25s: %02d%% (%d)" % + (simplemask, count*100/self.filter_counter, count)) - print - "\n[*] Advanced Masks:" - for (advancedmask, count) in sorted(self.stats_advancedmasks.iteritems(), key=operator.itemgetter(1), - reverse=True): - if count * 100 / self.filter_counter > 0: - print - "[+] %25s: %02d%% (%d)" % (advancedmask, count * 100 / self.filter_counter, count) + print("\n[*] Advanced Masks:") + for (advancedmask, count) in sorted(iter(self.stats_advancedmasks.items()), key=operator.itemgetter(1), reverse=True): + if count*100/self.filter_counter > 0: + print("[+] %25s: %02d%% (%d)" % + (advancedmask, count*100/self.filter_counter, count)) if self.output_file: self.output_file.write("%s,%d\n" % (advancedmask, count)) @@ -253,49 +262,57 @@ if __name__ == "__main__": header += " |_| iphelix@thesprawl.org\n" header += "\n" - parser = OptionParser("%prog [options] passwords.txt\n\nType --help for more options", version="%prog " + VERSION) + parser = OptionParser( + "%prog [options] passwords.txt\n\nType --help for more options", version="%prog "+VERSION) filters = OptionGroup(parser, "Password Filters") - filters.add_option("--minlength", dest="minlength", type="int", metavar="8", help="Minimum password length") - filters.add_option("--maxlength", dest="maxlength", type="int", metavar="8", help="Maximum password length") - filters.add_option("--charset", dest="charsets", help="Password charset filter (comma separated)", - metavar="loweralpha,numeric") - filters.add_option("--simplemask", dest="simplemasks", help="Password mask filter (comma separated)", - metavar="stringdigit,allspecial") + filters.add_option("--minlength", dest="minlength", + type="int", metavar="8", help="Minimum password length") + filters.add_option("--maxlength", dest="maxlength", + type="int", metavar="8", help="Maximum password length") + filters.add_option("--charset", dest="charsets", + help="Password charset filter (comma separated)", metavar="loweralpha,numeric") + filters.add_option("--simplemask", dest="simplemasks", + help="Password mask filter (comma separated)", metavar="stringdigit,allspecial") parser.add_option_group(filters) - parser.add_option("-o", "--output", dest="output_file", help="Save masks and stats to a file", - metavar="password.masks") - parser.add_option("--hiderare", action="store_true", dest="hiderare", default=False, - help="Hide statistics covering less than 1% of the sample") + parser.add_option("-o", "--output", dest="output_file", + help="Save masks and stats to a file", metavar="password.masks") + parser.add_option("--hiderare", action="store_true", dest="hiderare", + default=False, help="Hide statistics covering less than 1% of the sample") - parser.add_option("-q", "--quiet", action="store_true", dest="quiet", default=False, help="Don't show headers.") + parser.add_option("-q", "--quiet", action="store_true", + dest="quiet", default=False, help="Don't show headers.") (options, args) = parser.parse_args() # Print program header if not options.quiet: - print - header + print(header) if len(args) != 1: parser.error("no passwords file specified") exit(1) - print - "[*] Analyzing passwords in [%s]" % args[0] + print("[*] Analyzing passwords in [%s]" % args[0]) statsgen = StatsGen() - if not options.minlength == None: statsgen.minlength = options.minlength - if not options.maxlength == None: statsgen.maxlength = options.maxlength - if not options.charsets == None: statsgen.charsets = [x.strip() for x in options.charsets.split(',')] - if not options.simplemasks == None: statsgen.simplemasks = [x.strip() for x in options.simplemasks.split(',')] + if not options.minlength == None: + statsgen.minlength = options.minlength + if not options.maxlength == None: + statsgen.maxlength = options.maxlength + if not options.charsets == None: + statsgen.charsets = [x.strip() for x in options.charsets.split(',')] + if not options.simplemasks == None: + statsgen.simplemasks = [x.strip() + for x in options.simplemasks.split(',')] - if options.hiderare: statsgen.hiderare = options.hiderare + if options.hiderare: + statsgen.hiderare = options.hiderare if options.output_file: - print - "[*] Saving advanced masks and occurrences to [%s]" % options.output_file + print("[*] Saving advanced masks and occurrences to [%s]" % + options.output_file) statsgen.output_file = open(options.output_file, 'w') statsgen.generate_stats(args[0])