mirror of
https://github.com/trustedsec/hate_crack.git
synced 2025-12-12 07:40:27 -08:00
355 lines
11 KiB
Python
355 lines
11 KiB
Python
# pyenchant
|
|
#
|
|
# Copyright (C) 2004-2008 Ryan Kelly
|
|
#
|
|
# This library is free software; you can redistribute it and/or
|
|
# modify it under the terms of the GNU Lesser General Public
|
|
# License as published by the Free Software Foundation; either
|
|
# version 2.1 of the License, or (at your option) any later version.
|
|
#
|
|
# This library is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
# Lesser General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU Lesser General Public
|
|
# License along with this library; if not, write to the
|
|
# Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
|
# Boston, MA 02111-1307, USA.
|
|
#
|
|
# In addition, as a special exception, you are
|
|
# given permission to link the code of this program with
|
|
# non-LGPL Spelling Provider libraries (eg: a MSFT Office
|
|
# spell checker backend) and distribute linked combinations including
|
|
# the two. You must obey the GNU Lesser General Public License in all
|
|
# respects for all of the code used other than said providers. If you modify
|
|
# this file, you may extend this exception to your version of the
|
|
# file, but you are not obligated to do so. If you do not wish to
|
|
# do so, delete this exception statement from your version.
|
|
#
|
|
"""
|
|
|
|
enchant.utils: Misc utilities for the enchant package
|
|
========================================================
|
|
|
|
This module provides miscellaneous utilities for use with the
|
|
enchant spellchecking package. Currently available functionality
|
|
includes:
|
|
|
|
* string/unicode compatibility wrappers
|
|
* functions for dealing with locale/language settings
|
|
* ability to list supporting data files (win32 only)
|
|
* functions for bundling supporting data files from a build
|
|
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import codecs
|
|
|
|
from enchant.errors import *
|
|
|
|
# Attempt to access local language information
|
|
try:
|
|
import locale
|
|
except ImportError:
|
|
locale = None
|
|
|
|
#
|
|
# Unicode/Bytes compatabilty wrappers.
|
|
#
|
|
# These allow us to support both Python 2.x and Python 3.x from
|
|
# the same codebase.
|
|
#
|
|
# We provide explicit type objects "bytes" and "unicode" that can be
|
|
# used to construct instances of the appropriate type. The class
|
|
# "EnchantStr" derives from the default "str" type and implements the
|
|
# necessary logic for encoding/decoding as strings are passed into
|
|
# the underlying C library (where they must always be utf-8 encoded
|
|
# byte strings).
|
|
#
|
|
|
|
try:
|
|
unicode = unicode
|
|
except NameError:
|
|
str = str
|
|
unicode = str
|
|
bytes = bytes
|
|
basestring = (str, bytes)
|
|
else:
|
|
str = str
|
|
unicode = unicode
|
|
bytes = str
|
|
basestring = basestring
|
|
|
|
|
|
def raw_unicode(raw):
|
|
"""Make a unicode string from a raw string.
|
|
|
|
This function takes a string containing unicode escape characters,
|
|
and returns the corresponding unicode string. Useful for writing
|
|
unicode string literals in your python source while being upwards-
|
|
compatible with Python 3. For example, instead of doing this:
|
|
|
|
s = u"hello\u2149" # syntax error in Python 3
|
|
|
|
Or this:
|
|
|
|
s = "hello\u2149" # not what you want in Python 2.x
|
|
|
|
You can do this:
|
|
|
|
s = raw_unicode(r"hello\u2149") # works everywhere!
|
|
|
|
"""
|
|
return raw.encode("utf8").decode("unicode-escape")
|
|
|
|
|
|
def raw_bytes(raw):
|
|
"""Make a bytes object out of a raw string.
|
|
|
|
This is analogous to raw_unicode, but processes byte escape characters
|
|
to produce a bytes object.
|
|
"""
|
|
return codecs.escape_decode(raw)[0]
|
|
|
|
|
|
class EnchantStr(str):
|
|
"""String subclass for interfacing with enchant C library.
|
|
|
|
This class encapsulates the logic for interfacing between python native
|
|
string/unicode objects and the underlying enchant library, which expects
|
|
all strings to be UTF-8 character arrays. It is a subclass of the
|
|
default string class 'str' - on Python 2.x that makes it an ascii string,
|
|
on Python 3.x it is a unicode object.
|
|
|
|
Initialise it with a string or unicode object, and use the encode() method
|
|
to obtain an object suitable for passing to the underlying C library.
|
|
When strings are read back into python, use decode(s) to translate them
|
|
back into the appropriate python-level string type.
|
|
|
|
This allows us to following the common Python 2.x idiom of returning
|
|
unicode when unicode is passed in, and byte strings otherwise. It also
|
|
lets the interface be upwards-compatible with Python 3, in which string
|
|
objects are unicode by default.
|
|
"""
|
|
|
|
def __new__(cls, value):
|
|
"""EnchantStr data constructor.
|
|
|
|
This method records whether the initial string was unicode, then
|
|
simply passes it along to the default string constructor.
|
|
"""
|
|
if type(value) is unicode:
|
|
was_unicode = True
|
|
if str is not unicode:
|
|
value = value.encode("utf-8")
|
|
else:
|
|
was_unicode = False
|
|
if str is not bytes:
|
|
raise Error("Don't pass bytestrings to pyenchant")
|
|
self = str.__new__(cls, value)
|
|
self._was_unicode = was_unicode
|
|
return self
|
|
|
|
def encode(self):
|
|
"""Encode this string into a form usable by the enchant C library."""
|
|
if str is unicode:
|
|
return str.encode(self, "utf-8")
|
|
else:
|
|
return self
|
|
|
|
def decode(self, value):
|
|
"""Decode a string returned by the enchant C library."""
|
|
if self._was_unicode:
|
|
if str is unicode:
|
|
# On some python3 versions, ctypes converts c_char_p
|
|
# to str() rather than bytes()
|
|
if isinstance(value, str):
|
|
value = value.encode()
|
|
return value.decode("utf-8")
|
|
else:
|
|
return value.decode("utf-8")
|
|
else:
|
|
return value
|
|
|
|
|
|
def printf(values, sep=" ", end="\n", file=None):
|
|
"""Compatability wrapper from print statement/function.
|
|
|
|
This function is a simple Python2/Python3 compatability wrapper
|
|
for printing to stdout.
|
|
"""
|
|
if file is None:
|
|
file = sys.stdout
|
|
file.write(sep.join(map(str, values)))
|
|
file.write(end)
|
|
|
|
|
|
try:
|
|
next = next
|
|
except NameError:
|
|
def next(iter):
|
|
"""Compatability wrapper for advancing an iterator."""
|
|
return iter.next()
|
|
|
|
try:
|
|
xrange = xrange
|
|
except NameError:
|
|
xrange = range
|
|
|
|
|
|
#
|
|
# Other useful functions.
|
|
#
|
|
|
|
|
|
def levenshtein(s1, s2):
|
|
"""Calculate the Levenshtein distance between two strings.
|
|
|
|
This is straight from Wikipedia.
|
|
"""
|
|
if len(s1) < len(s2):
|
|
return levenshtein(s2, s1)
|
|
if not s1:
|
|
return len(s2)
|
|
|
|
previous_row = xrange(len(s2) + 1)
|
|
for i, c1 in enumerate(s1):
|
|
current_row = [i + 1]
|
|
for j, c2 in enumerate(s2):
|
|
insertions = previous_row[j + 1] + 1
|
|
deletions = current_row[j] + 1
|
|
substitutions = previous_row[j] + (c1 != c2)
|
|
current_row.append(min(insertions, deletions, substitutions))
|
|
previous_row = current_row
|
|
|
|
return previous_row[-1]
|
|
|
|
|
|
def trim_suggestions(word, suggs, maxlen, calcdist=None):
|
|
"""Trim a list of suggestions to a maximum length.
|
|
|
|
If the list of suggested words is too long, you can use this function
|
|
to trim it down to a maximum length. It tries to keep the "best"
|
|
suggestions based on similarity to the original word.
|
|
|
|
If the optional "calcdist" argument is provided, it must be a callable
|
|
taking two words and returning the distance between them. It will be
|
|
used to determine which words to retain in the list. The default is
|
|
a simple Levenshtein distance.
|
|
"""
|
|
if calcdist is None:
|
|
calcdist = levenshtein
|
|
decorated = [(calcdist(word, s), s) for s in suggs]
|
|
decorated.sort()
|
|
return [s for (l, s) in decorated[:maxlen]]
|
|
|
|
|
|
def get_default_language(default=None):
|
|
"""Determine the user's default language, if possible.
|
|
|
|
This function uses the 'locale' module to try to determine
|
|
the user's preferred language. The return value is as
|
|
follows:
|
|
|
|
* if a locale is available for the LC_MESSAGES category,
|
|
that language is used
|
|
* if a default locale is available, that language is used
|
|
* if the keyword argument <default> is given, it is used
|
|
* if nothing else works, None is returned
|
|
|
|
Note that determining the user's language is in general only
|
|
possible if they have set the necessary environment variables
|
|
on their system.
|
|
"""
|
|
try:
|
|
import locale
|
|
tag = locale.getlocale()[0]
|
|
if tag is None:
|
|
tag = locale.getdefaultlocale()[0]
|
|
if tag is None:
|
|
raise Error("No default language available")
|
|
return tag
|
|
except Exception:
|
|
pass
|
|
return default
|
|
|
|
|
|
get_default_language._DOC_ERRORS = ["LC"]
|
|
|
|
|
|
def get_resource_filename(resname):
|
|
"""Get the absolute path to the named resource file.
|
|
|
|
This serves widely the same purpose as pkg_resources.resource_filename(),
|
|
but tries to avoid loading pkg_resources unless we're actually in
|
|
an egg.
|
|
"""
|
|
path = os.path.dirname(os.path.abspath(__file__))
|
|
path = os.path.join(path, resname)
|
|
if os.path.exists(path):
|
|
return path
|
|
if hasattr(sys, "frozen"):
|
|
exe_path = unicode(sys.executable, sys.getfilesystemencoding())
|
|
exe_dir = os.path.dirname(exe_path)
|
|
path = os.path.join(exe_dir, resname)
|
|
if os.path.exists(path):
|
|
return path
|
|
else:
|
|
import pkg_resources
|
|
try:
|
|
path = pkg_resources.resource_filename("enchant", resname)
|
|
except KeyError:
|
|
pass
|
|
else:
|
|
path = os.path.abspath(path)
|
|
if os.path.exists(path):
|
|
return path
|
|
raise Error("Could not locate resource '%s'" % (resname,))
|
|
|
|
|
|
def win32_data_files():
|
|
"""Get list of supporting data files, for use with setup.py
|
|
|
|
This function returns a list of the supporting data files available
|
|
to the running version of PyEnchant. This is in the format expected
|
|
by the data_files argument of the distutils setup function. It's
|
|
very useful, for example, for including the data files in an executable
|
|
produced by py2exe.
|
|
|
|
Only really tested on the win32 platform (it's the only platform for
|
|
which we ship our own supporting data files)
|
|
"""
|
|
# Include the main enchant DLL
|
|
try:
|
|
libEnchant = get_resource_filename("libenchant.dll")
|
|
except Error:
|
|
libEnchant = get_resource_filename("libenchant-1.dll")
|
|
mainDir = os.path.dirname(libEnchant)
|
|
dataFiles = [('', [libEnchant])]
|
|
# And some specific supporting DLLs
|
|
for dll in os.listdir(mainDir):
|
|
if not dll.endswith(".dll"):
|
|
continue
|
|
for prefix in ("iconv", "intl", "libglib", "libgmodule"):
|
|
if dll.startswith(prefix):
|
|
break
|
|
else:
|
|
continue
|
|
dataFiles[0][1].append(os.path.join(mainDir, dll))
|
|
# And anything found in the supporting data directories
|
|
dataDirs = ("share/enchant/myspell", "share/enchant/ispell", "lib/enchant")
|
|
for dataDir in dataDirs:
|
|
files = []
|
|
fullDir = os.path.join(mainDir, os.path.normpath(dataDir))
|
|
for fn in os.listdir(fullDir):
|
|
fullFn = os.path.join(fullDir, fn)
|
|
if os.path.isfile(fullFn):
|
|
files.append(fullFn)
|
|
dataFiles.append((dataDir, files))
|
|
return dataFiles
|
|
|
|
|
|
win32_data_files._DOC_ERRORS = ["py", "py", "exe"]
|