mirror of
https://github.com/mandiant/capa.git
synced 2025-12-12 15:49:46 -08:00
Compare commits
2 Commits
dependabot
...
push-trmuz
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
02b59301dd | ||
|
|
a8e52615a3 |
@@ -4,6 +4,8 @@
|
||||
|
||||
### New Features
|
||||
|
||||
- add IDA v9.0 backend via idalib #2376 @williballenthin
|
||||
|
||||
### Breaking Changes
|
||||
|
||||
### New Rules (0)
|
||||
|
||||
@@ -20,9 +20,9 @@ from importlib import util
|
||||
spec = util.find_spec('binaryninja')
|
||||
if spec is not None:
|
||||
if len(spec.submodule_search_locations) > 0:
|
||||
path = Path(spec.submodule_search_locations[0])
|
||||
# encode the path with utf8 then convert to hex, make sure it can be read and restored properly
|
||||
print(str(path.parent).encode('utf8').hex())
|
||||
path = Path(spec.submodule_search_locations[0])
|
||||
# encode the path with utf8 then convert to hex, make sure it can be read and restored properly
|
||||
print(str(path.parent).encode('utf8').hex())
|
||||
"""
|
||||
|
||||
|
||||
|
||||
@@ -32,7 +32,9 @@ class IdaFeatureExtractor(StaticFeatureExtractor):
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
hashes=SampleHashes(
|
||||
md5=ida_nalt.retrieve_input_file_md5(), sha1="(unknown)", sha256=ida_nalt.retrieve_input_file_sha256()
|
||||
md5=ida_nalt.retrieve_input_file_md5().hex(),
|
||||
sha1="(unknown)",
|
||||
sha256=ida_nalt.retrieve_input_file_sha256().hex(),
|
||||
)
|
||||
)
|
||||
self.global_features: List[Tuple[Feature, Address]] = []
|
||||
|
||||
113
capa/features/extractors/ida/idalib.py
Normal file
113
capa/features/extractors/ida/idalib.py
Normal file
@@ -0,0 +1,113 @@
|
||||
# Copyright (C) 2024 Mandiant, Inc. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import logging
|
||||
import importlib.util
|
||||
from typing import Optional
|
||||
from pathlib import Path
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def is_idalib_installed() -> bool:
|
||||
try:
|
||||
return importlib.util.find_spec("ida") is not None
|
||||
except ModuleNotFoundError:
|
||||
return False
|
||||
|
||||
|
||||
def get_idalib_user_config_path() -> Optional[Path]:
|
||||
"""Get the path to the user's config file based on platform following IDA's user directories."""
|
||||
# derived from `py-activate-idalib.py` from IDA v9.0 Beta 4
|
||||
|
||||
if sys.platform == "win32":
|
||||
# On Windows, use the %APPDATA%\Hex-Rays\IDA Pro directory
|
||||
config_dir = Path(os.getenv("APPDATA")) / "Hex-Rays" / "IDA Pro"
|
||||
else:
|
||||
# On macOS and Linux, use ~/.idapro
|
||||
config_dir = Path.home() / ".idapro"
|
||||
|
||||
# Return the full path to the config file (now in JSON format)
|
||||
user_config_path = config_dir / "ida-config.json"
|
||||
if not user_config_path.exists():
|
||||
return None
|
||||
return user_config_path
|
||||
|
||||
|
||||
def find_idalib() -> Optional[Path]:
|
||||
config_path = get_idalib_user_config_path()
|
||||
if not config_path:
|
||||
return None
|
||||
|
||||
config = json.loads(config_path.read_text(encoding="utf-8"))
|
||||
|
||||
try:
|
||||
ida_install_dir = Path(config["Paths"]["ida-install-dir"])
|
||||
except KeyError:
|
||||
return None
|
||||
|
||||
if not ida_install_dir.exists():
|
||||
return None
|
||||
|
||||
libname = {
|
||||
"win32": "idalib.dll",
|
||||
"linux": "libidalib.so",
|
||||
"linux2": "libidalib.so",
|
||||
"darwin": "libidalib.dylib",
|
||||
}[sys.platform]
|
||||
|
||||
if not (ida_install_dir / "ida.hlp").is_file():
|
||||
return None
|
||||
|
||||
if not (ida_install_dir / libname).is_file():
|
||||
return None
|
||||
|
||||
idalib_path = ida_install_dir / "idalib" / "python"
|
||||
if not idalib_path.exists():
|
||||
return None
|
||||
|
||||
if not (idalib_path / "ida" / "__init__.py").is_file():
|
||||
return None
|
||||
|
||||
return idalib_path
|
||||
|
||||
|
||||
def has_idalib() -> bool:
|
||||
if is_idalib_installed():
|
||||
logger.debug("found installed IDA idalib API")
|
||||
return True
|
||||
|
||||
logger.debug("IDA idalib API not installed, searching...")
|
||||
|
||||
idalib_path = find_idalib()
|
||||
if not idalib_path:
|
||||
logger.debug("failed to find IDA idalib installation")
|
||||
|
||||
logger.debug("found IDA idalib API: %s", idalib_path)
|
||||
return idalib_path is not None
|
||||
|
||||
|
||||
def load_idalib() -> bool:
|
||||
try:
|
||||
import ida
|
||||
|
||||
return True
|
||||
except ImportError:
|
||||
idalib_path = find_idalib()
|
||||
if not idalib_path:
|
||||
return False
|
||||
|
||||
sys.path.append(idalib_path.absolute().as_posix())
|
||||
try:
|
||||
import ida # noqa: F401 unused import
|
||||
|
||||
return True
|
||||
except ImportError:
|
||||
return False
|
||||
@@ -6,9 +6,12 @@
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import os
|
||||
import io
|
||||
import sys
|
||||
import gzip
|
||||
import ctypes
|
||||
import inspect
|
||||
import tempfile
|
||||
import logging
|
||||
import contextlib
|
||||
import importlib.util
|
||||
@@ -81,6 +84,47 @@ def assert_never(value) -> NoReturn:
|
||||
assert False, f"Unhandled value: {value} ({type(value).__name__})" # noqa: B011
|
||||
|
||||
|
||||
# Redirect stdout at the C runtime level,
|
||||
# which lets us handle native libraries that spam stdout.
|
||||
# via: https://eli.thegreenplace.net/2015/redirecting-all-kinds-of-stdout-in-python/
|
||||
LIBC = ctypes.CDLL(None)
|
||||
C_STDOUT = ctypes.c_void_p.in_dll(LIBC, "stdout")
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def stdout_redirector(stream):
|
||||
# The original fd stdout points to. Usually 1 on POSIX systems.
|
||||
original_stdout_fd = sys.stdout.fileno()
|
||||
|
||||
def _redirect_stdout(to_fd):
|
||||
"""Redirect stdout to the given file descriptor."""
|
||||
# Flush the C-level buffer stdout
|
||||
LIBC.fflush(C_STDOUT)
|
||||
# Flush and close sys.stdout - also closes the file descriptor (fd)
|
||||
sys.stdout.close()
|
||||
# Make original_stdout_fd point to the same file as to_fd
|
||||
os.dup2(to_fd, original_stdout_fd)
|
||||
# Create a new sys.stdout that points to the redirected fd
|
||||
sys.stdout = io.TextIOWrapper(os.fdopen(original_stdout_fd, 'wb'))
|
||||
|
||||
# Save a copy of the original stdout fd in saved_stdout_fd
|
||||
saved_stdout_fd = os.dup(original_stdout_fd)
|
||||
try:
|
||||
# Create a temporary file and redirect stdout to it
|
||||
tfile = tempfile.TemporaryFile(mode='w+b')
|
||||
_redirect_stdout(tfile.fileno())
|
||||
# Yield to caller, then redirect stdout back to the saved fd
|
||||
yield
|
||||
_redirect_stdout(saved_stdout_fd)
|
||||
# Copy contents of temporary file to the given stream
|
||||
tfile.flush()
|
||||
tfile.seek(0, io.SEEK_SET)
|
||||
stream.write(tfile.read())
|
||||
finally:
|
||||
tfile.close()
|
||||
os.close(saved_stdout_fd)
|
||||
|
||||
|
||||
def load_json_from_path(json_path: Path):
|
||||
with gzip.open(json_path, "r") as compressed_report:
|
||||
try:
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import io
|
||||
import os
|
||||
import sys
|
||||
import logging
|
||||
@@ -69,6 +70,7 @@ BACKEND_DRAKVUF = "drakvuf"
|
||||
BACKEND_VMRAY = "vmray"
|
||||
BACKEND_FREEZE = "freeze"
|
||||
BACKEND_BINEXPORT2 = "binexport2"
|
||||
BACKEND_IDA = "ida"
|
||||
|
||||
|
||||
class CorruptFile(ValueError):
|
||||
@@ -321,6 +323,36 @@ def get_extractor(
|
||||
|
||||
return capa.features.extractors.binexport2.extractor.BinExport2FeatureExtractor(be2, buf)
|
||||
|
||||
elif backend == BACKEND_IDA:
|
||||
import capa.features.extractors.ida.idalib as idalib
|
||||
|
||||
if not idalib.has_idalib():
|
||||
raise RuntimeError(
|
||||
# TODO(williballenthin): add more details here
|
||||
"cannot find IDA idalib module."
|
||||
)
|
||||
|
||||
if not idalib.load_idalib():
|
||||
raise RuntimeError("failed to load IDA idalib module.")
|
||||
|
||||
import ida
|
||||
import ida_auto
|
||||
|
||||
import capa.features.extractors.ida.extractor
|
||||
|
||||
logger.debug("idalib: opening database...")
|
||||
# idalib writes to stdout (ugh), so we have to capture that
|
||||
# so as not to screw up structured output.
|
||||
with capa.helpers.stdout_redirector(io.BytesIO()):
|
||||
if ida.open_database(str(input_path), run_auto_analysis=True):
|
||||
raise RuntimeError("failed to analyze input file")
|
||||
|
||||
logger.debug("idalib: waiting for analysis...")
|
||||
ida_auto.auto_wait()
|
||||
logger.debug("idalib: opened database.")
|
||||
|
||||
return capa.features.extractors.ida.extractor.IdaFeatureExtractor()
|
||||
|
||||
else:
|
||||
raise ValueError("unexpected backend: " + backend)
|
||||
|
||||
|
||||
@@ -43,6 +43,7 @@ import capa.features.extractors.common
|
||||
from capa.rules import RuleSet
|
||||
from capa.engine import MatchResults
|
||||
from capa.loader import (
|
||||
BACKEND_IDA,
|
||||
BACKEND_VIV,
|
||||
BACKEND_CAPE,
|
||||
BACKEND_BINJA,
|
||||
@@ -283,6 +284,7 @@ def install_common_args(parser, wanted=None):
|
||||
backends = [
|
||||
(BACKEND_AUTO, "(default) detect appropriate backend automatically"),
|
||||
(BACKEND_VIV, "vivisect"),
|
||||
(BACKEND_IDA, "IDA via idalib"),
|
||||
(BACKEND_PEFILE, "pefile (file features only)"),
|
||||
(BACKEND_BINJA, "Binary Ninja"),
|
||||
(BACKEND_DOTNET, ".NET"),
|
||||
|
||||
@@ -183,7 +183,9 @@ known_first_party = [
|
||||
"binaryninja",
|
||||
"flirt",
|
||||
"ghidra",
|
||||
"ida",
|
||||
"ida_ida",
|
||||
"ida_auto",
|
||||
"ida_bytes",
|
||||
"ida_entry",
|
||||
"ida_funcs",
|
||||
|
||||
204
scripts/detect-backends.py
Normal file
204
scripts/detect-backends.py
Normal file
@@ -0,0 +1,204 @@
|
||||
# Copyright (C) 2024 Mandiant, Inc. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import os
|
||||
import sys
|
||||
import logging
|
||||
import importlib.util
|
||||
from typing import Optional
|
||||
from pathlib import Path
|
||||
|
||||
import rich
|
||||
import rich.table
|
||||
|
||||
from capa.features.extractors.ida.idalib import find_idalib, load_idalib, is_idalib_installed
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def get_desktop_entry(name: str) -> Optional[Path]:
|
||||
"""
|
||||
Find the path for the given XDG Desktop Entry name.
|
||||
|
||||
Like:
|
||||
|
||||
>> get_desktop_entry("com.vector35.binaryninja.desktop")
|
||||
Path("~/.local/share/applications/com.vector35.binaryninja.desktop")
|
||||
"""
|
||||
assert sys.platform in ("linux", "linux2")
|
||||
assert name.endswith(".desktop")
|
||||
|
||||
default_data_dirs = f"/usr/share/applications:{Path.home()}/.local/share"
|
||||
data_dirs = os.environ.get("XDG_DATA_DIRS", default_data_dirs)
|
||||
for data_dir in data_dirs.split(":"):
|
||||
applications = Path(data_dir) / "applications"
|
||||
for application in applications.glob("*.desktop"):
|
||||
if application.name == name:
|
||||
return application
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def get_binaryninja_path(desktop_entry: Path) -> Optional[Path]:
|
||||
# from: Exec=/home/wballenthin/software/binaryninja/binaryninja %u
|
||||
# to: /home/wballenthin/software/binaryninja/
|
||||
for line in desktop_entry.read_text(encoding="utf-8").splitlines():
|
||||
if not line.startswith("Exec="):
|
||||
continue
|
||||
|
||||
if not line.endswith("binaryninja %u"):
|
||||
continue
|
||||
|
||||
binaryninja_path = Path(line[len("Exec=") : -len("binaryninja %u")])
|
||||
if not binaryninja_path.exists():
|
||||
return None
|
||||
|
||||
return binaryninja_path
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def find_binaryninja() -> Optional[Path]:
|
||||
if sys.platform == "linux" or sys.platform == "linux2":
|
||||
# ok
|
||||
logger.debug("detected OS: linux")
|
||||
elif sys.platform == "darwin":
|
||||
raise NotImplementedError(f"unsupported platform: {sys.platform}")
|
||||
elif sys.platform == "win32":
|
||||
raise NotImplementedError(f"unsupported platform: {sys.platform}")
|
||||
else:
|
||||
raise NotImplementedError(f"unsupported platform: {sys.platform}")
|
||||
|
||||
desktop_entry = get_desktop_entry("com.vector35.binaryninja.desktop")
|
||||
if not desktop_entry:
|
||||
return None
|
||||
logger.debug("found Binary Ninja application: %s", desktop_entry)
|
||||
|
||||
binaryninja_path = get_binaryninja_path(desktop_entry)
|
||||
if not binaryninja_path:
|
||||
return None
|
||||
logger.debug("found Binary Ninja installation: %s", binaryninja_path)
|
||||
|
||||
module_path = binaryninja_path / "python"
|
||||
if not module_path.exists():
|
||||
return None
|
||||
|
||||
if not (module_path / "binaryninja" / "__init__.py").exists():
|
||||
return None
|
||||
|
||||
return module_path
|
||||
|
||||
|
||||
def is_binaryninja_installed() -> bool:
|
||||
"""Is the binaryninja module ready to import?"""
|
||||
try:
|
||||
return importlib.util.find_spec("binaryninja") is not None
|
||||
except ModuleNotFoundError:
|
||||
return False
|
||||
|
||||
|
||||
def has_binaryninja() -> bool:
|
||||
if is_binaryninja_installed():
|
||||
logger.debug("found installed Binary Ninja API")
|
||||
return True
|
||||
|
||||
logger.debug("Binary Ninja API not installed, searching...")
|
||||
|
||||
binaryninja_path = find_binaryninja()
|
||||
if not binaryninja_path:
|
||||
logger.debug("failed to find Binary Ninja installation")
|
||||
|
||||
logger.debug("found Binary Ninja API: %s", binaryninja_path)
|
||||
return binaryninja_path is not None
|
||||
|
||||
|
||||
def load_binaryninja() -> bool:
|
||||
try:
|
||||
import binaryninja
|
||||
|
||||
return True
|
||||
except ImportError:
|
||||
binaryninja_path = find_binaryninja()
|
||||
if not binaryninja_path:
|
||||
return False
|
||||
|
||||
sys.path.append(binaryninja_path.absolute().as_posix())
|
||||
try:
|
||||
import binaryninja # noqa: F401 unused import
|
||||
|
||||
return True
|
||||
except ImportError:
|
||||
return False
|
||||
|
||||
|
||||
def is_vivisect_installed() -> bool:
|
||||
try:
|
||||
return importlib.util.find_spec("vivisect") is not None
|
||||
except ModuleNotFoundError:
|
||||
return False
|
||||
|
||||
|
||||
def load_vivisect() -> bool:
|
||||
try:
|
||||
import vivisect # noqa: F401 unused import
|
||||
|
||||
return True
|
||||
except ImportError:
|
||||
return False
|
||||
|
||||
|
||||
def main():
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
|
||||
table = rich.table.Table()
|
||||
table.add_column("backend")
|
||||
table.add_column("already installed?")
|
||||
table.add_column("found?")
|
||||
table.add_column("loads?")
|
||||
|
||||
if True:
|
||||
row = ["vivisect"]
|
||||
if is_vivisect_installed():
|
||||
row.append("True")
|
||||
row.append("-")
|
||||
else:
|
||||
row.append("False")
|
||||
row.append("False")
|
||||
|
||||
row.append(str(load_vivisect()))
|
||||
table.add_row(*row)
|
||||
|
||||
if True:
|
||||
row = ["Binary Ninja"]
|
||||
if is_binaryninja_installed():
|
||||
row.append("True")
|
||||
row.append("-")
|
||||
else:
|
||||
row.append("False")
|
||||
row.append(str(find_binaryninja() is not None))
|
||||
|
||||
row.append(str(load_binaryninja()))
|
||||
table.add_row(*row)
|
||||
|
||||
if True:
|
||||
row = ["IDA idalib"]
|
||||
if is_idalib_installed():
|
||||
row.append("True")
|
||||
row.append("-")
|
||||
else:
|
||||
row.append("False")
|
||||
row.append(str(find_idalib() is not None))
|
||||
|
||||
row.append(str(load_idalib()))
|
||||
table.add_row(*row)
|
||||
|
||||
rich.print(table)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user