Compare commits

...

2 Commits

Author SHA1 Message Date
Willi Ballenthin
02b59301dd add idalib backend 2024-09-20 10:47:21 +00:00
Willi Ballenthin
a8e52615a3 introduce script to detect 3P backends
ref #2376
2024-09-20 09:03:46 +00:00
9 changed files with 405 additions and 4 deletions

View File

@@ -4,6 +4,8 @@
### New Features
- add IDA v9.0 backend via idalib #2376 @williballenthin
### Breaking Changes
### New Rules (0)

View File

@@ -20,9 +20,9 @@ from importlib import util
spec = util.find_spec('binaryninja')
if spec is not None:
if len(spec.submodule_search_locations) > 0:
path = Path(spec.submodule_search_locations[0])
# encode the path with utf8 then convert to hex, make sure it can be read and restored properly
print(str(path.parent).encode('utf8').hex())
path = Path(spec.submodule_search_locations[0])
# encode the path with utf8 then convert to hex, make sure it can be read and restored properly
print(str(path.parent).encode('utf8').hex())
"""

View File

@@ -32,7 +32,9 @@ class IdaFeatureExtractor(StaticFeatureExtractor):
def __init__(self):
super().__init__(
hashes=SampleHashes(
md5=ida_nalt.retrieve_input_file_md5(), sha1="(unknown)", sha256=ida_nalt.retrieve_input_file_sha256()
md5=ida_nalt.retrieve_input_file_md5().hex(),
sha1="(unknown)",
sha256=ida_nalt.retrieve_input_file_sha256().hex(),
)
)
self.global_features: List[Tuple[Feature, Address]] = []

View File

@@ -0,0 +1,113 @@
# Copyright (C) 2024 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import os
import sys
import json
import logging
import importlib.util
from typing import Optional
from pathlib import Path
logger = logging.getLogger(__name__)
def is_idalib_installed() -> bool:
try:
return importlib.util.find_spec("ida") is not None
except ModuleNotFoundError:
return False
def get_idalib_user_config_path() -> Optional[Path]:
"""Get the path to the user's config file based on platform following IDA's user directories."""
# derived from `py-activate-idalib.py` from IDA v9.0 Beta 4
if sys.platform == "win32":
# On Windows, use the %APPDATA%\Hex-Rays\IDA Pro directory
config_dir = Path(os.getenv("APPDATA")) / "Hex-Rays" / "IDA Pro"
else:
# On macOS and Linux, use ~/.idapro
config_dir = Path.home() / ".idapro"
# Return the full path to the config file (now in JSON format)
user_config_path = config_dir / "ida-config.json"
if not user_config_path.exists():
return None
return user_config_path
def find_idalib() -> Optional[Path]:
config_path = get_idalib_user_config_path()
if not config_path:
return None
config = json.loads(config_path.read_text(encoding="utf-8"))
try:
ida_install_dir = Path(config["Paths"]["ida-install-dir"])
except KeyError:
return None
if not ida_install_dir.exists():
return None
libname = {
"win32": "idalib.dll",
"linux": "libidalib.so",
"linux2": "libidalib.so",
"darwin": "libidalib.dylib",
}[sys.platform]
if not (ida_install_dir / "ida.hlp").is_file():
return None
if not (ida_install_dir / libname).is_file():
return None
idalib_path = ida_install_dir / "idalib" / "python"
if not idalib_path.exists():
return None
if not (idalib_path / "ida" / "__init__.py").is_file():
return None
return idalib_path
def has_idalib() -> bool:
if is_idalib_installed():
logger.debug("found installed IDA idalib API")
return True
logger.debug("IDA idalib API not installed, searching...")
idalib_path = find_idalib()
if not idalib_path:
logger.debug("failed to find IDA idalib installation")
logger.debug("found IDA idalib API: %s", idalib_path)
return idalib_path is not None
def load_idalib() -> bool:
try:
import ida
return True
except ImportError:
idalib_path = find_idalib()
if not idalib_path:
return False
sys.path.append(idalib_path.absolute().as_posix())
try:
import ida # noqa: F401 unused import
return True
except ImportError:
return False

View File

@@ -6,9 +6,12 @@
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import os
import io
import sys
import gzip
import ctypes
import inspect
import tempfile
import logging
import contextlib
import importlib.util
@@ -81,6 +84,47 @@ def assert_never(value) -> NoReturn:
assert False, f"Unhandled value: {value} ({type(value).__name__})" # noqa: B011
# Redirect stdout at the C runtime level,
# which lets us handle native libraries that spam stdout.
# via: https://eli.thegreenplace.net/2015/redirecting-all-kinds-of-stdout-in-python/
LIBC = ctypes.CDLL(None)
C_STDOUT = ctypes.c_void_p.in_dll(LIBC, "stdout")
@contextlib.contextmanager
def stdout_redirector(stream):
# The original fd stdout points to. Usually 1 on POSIX systems.
original_stdout_fd = sys.stdout.fileno()
def _redirect_stdout(to_fd):
"""Redirect stdout to the given file descriptor."""
# Flush the C-level buffer stdout
LIBC.fflush(C_STDOUT)
# Flush and close sys.stdout - also closes the file descriptor (fd)
sys.stdout.close()
# Make original_stdout_fd point to the same file as to_fd
os.dup2(to_fd, original_stdout_fd)
# Create a new sys.stdout that points to the redirected fd
sys.stdout = io.TextIOWrapper(os.fdopen(original_stdout_fd, 'wb'))
# Save a copy of the original stdout fd in saved_stdout_fd
saved_stdout_fd = os.dup(original_stdout_fd)
try:
# Create a temporary file and redirect stdout to it
tfile = tempfile.TemporaryFile(mode='w+b')
_redirect_stdout(tfile.fileno())
# Yield to caller, then redirect stdout back to the saved fd
yield
_redirect_stdout(saved_stdout_fd)
# Copy contents of temporary file to the given stream
tfile.flush()
tfile.seek(0, io.SEEK_SET)
stream.write(tfile.read())
finally:
tfile.close()
os.close(saved_stdout_fd)
def load_json_from_path(json_path: Path):
with gzip.open(json_path, "r") as compressed_report:
try:

View File

@@ -5,6 +5,7 @@
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import io
import os
import sys
import logging
@@ -69,6 +70,7 @@ BACKEND_DRAKVUF = "drakvuf"
BACKEND_VMRAY = "vmray"
BACKEND_FREEZE = "freeze"
BACKEND_BINEXPORT2 = "binexport2"
BACKEND_IDA = "ida"
class CorruptFile(ValueError):
@@ -321,6 +323,36 @@ def get_extractor(
return capa.features.extractors.binexport2.extractor.BinExport2FeatureExtractor(be2, buf)
elif backend == BACKEND_IDA:
import capa.features.extractors.ida.idalib as idalib
if not idalib.has_idalib():
raise RuntimeError(
# TODO(williballenthin): add more details here
"cannot find IDA idalib module."
)
if not idalib.load_idalib():
raise RuntimeError("failed to load IDA idalib module.")
import ida
import ida_auto
import capa.features.extractors.ida.extractor
logger.debug("idalib: opening database...")
# idalib writes to stdout (ugh), so we have to capture that
# so as not to screw up structured output.
with capa.helpers.stdout_redirector(io.BytesIO()):
if ida.open_database(str(input_path), run_auto_analysis=True):
raise RuntimeError("failed to analyze input file")
logger.debug("idalib: waiting for analysis...")
ida_auto.auto_wait()
logger.debug("idalib: opened database.")
return capa.features.extractors.ida.extractor.IdaFeatureExtractor()
else:
raise ValueError("unexpected backend: " + backend)

View File

@@ -43,6 +43,7 @@ import capa.features.extractors.common
from capa.rules import RuleSet
from capa.engine import MatchResults
from capa.loader import (
BACKEND_IDA,
BACKEND_VIV,
BACKEND_CAPE,
BACKEND_BINJA,
@@ -283,6 +284,7 @@ def install_common_args(parser, wanted=None):
backends = [
(BACKEND_AUTO, "(default) detect appropriate backend automatically"),
(BACKEND_VIV, "vivisect"),
(BACKEND_IDA, "IDA via idalib"),
(BACKEND_PEFILE, "pefile (file features only)"),
(BACKEND_BINJA, "Binary Ninja"),
(BACKEND_DOTNET, ".NET"),

View File

@@ -183,7 +183,9 @@ known_first_party = [
"binaryninja",
"flirt",
"ghidra",
"ida",
"ida_ida",
"ida_auto",
"ida_bytes",
"ida_entry",
"ida_funcs",

204
scripts/detect-backends.py Normal file
View File

@@ -0,0 +1,204 @@
# Copyright (C) 2024 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import os
import sys
import logging
import importlib.util
from typing import Optional
from pathlib import Path
import rich
import rich.table
from capa.features.extractors.ida.idalib import find_idalib, load_idalib, is_idalib_installed
logger = logging.getLogger(__name__)
def get_desktop_entry(name: str) -> Optional[Path]:
"""
Find the path for the given XDG Desktop Entry name.
Like:
>> get_desktop_entry("com.vector35.binaryninja.desktop")
Path("~/.local/share/applications/com.vector35.binaryninja.desktop")
"""
assert sys.platform in ("linux", "linux2")
assert name.endswith(".desktop")
default_data_dirs = f"/usr/share/applications:{Path.home()}/.local/share"
data_dirs = os.environ.get("XDG_DATA_DIRS", default_data_dirs)
for data_dir in data_dirs.split(":"):
applications = Path(data_dir) / "applications"
for application in applications.glob("*.desktop"):
if application.name == name:
return application
return None
def get_binaryninja_path(desktop_entry: Path) -> Optional[Path]:
# from: Exec=/home/wballenthin/software/binaryninja/binaryninja %u
# to: /home/wballenthin/software/binaryninja/
for line in desktop_entry.read_text(encoding="utf-8").splitlines():
if not line.startswith("Exec="):
continue
if not line.endswith("binaryninja %u"):
continue
binaryninja_path = Path(line[len("Exec=") : -len("binaryninja %u")])
if not binaryninja_path.exists():
return None
return binaryninja_path
return None
def find_binaryninja() -> Optional[Path]:
if sys.platform == "linux" or sys.platform == "linux2":
# ok
logger.debug("detected OS: linux")
elif sys.platform == "darwin":
raise NotImplementedError(f"unsupported platform: {sys.platform}")
elif sys.platform == "win32":
raise NotImplementedError(f"unsupported platform: {sys.platform}")
else:
raise NotImplementedError(f"unsupported platform: {sys.platform}")
desktop_entry = get_desktop_entry("com.vector35.binaryninja.desktop")
if not desktop_entry:
return None
logger.debug("found Binary Ninja application: %s", desktop_entry)
binaryninja_path = get_binaryninja_path(desktop_entry)
if not binaryninja_path:
return None
logger.debug("found Binary Ninja installation: %s", binaryninja_path)
module_path = binaryninja_path / "python"
if not module_path.exists():
return None
if not (module_path / "binaryninja" / "__init__.py").exists():
return None
return module_path
def is_binaryninja_installed() -> bool:
"""Is the binaryninja module ready to import?"""
try:
return importlib.util.find_spec("binaryninja") is not None
except ModuleNotFoundError:
return False
def has_binaryninja() -> bool:
if is_binaryninja_installed():
logger.debug("found installed Binary Ninja API")
return True
logger.debug("Binary Ninja API not installed, searching...")
binaryninja_path = find_binaryninja()
if not binaryninja_path:
logger.debug("failed to find Binary Ninja installation")
logger.debug("found Binary Ninja API: %s", binaryninja_path)
return binaryninja_path is not None
def load_binaryninja() -> bool:
try:
import binaryninja
return True
except ImportError:
binaryninja_path = find_binaryninja()
if not binaryninja_path:
return False
sys.path.append(binaryninja_path.absolute().as_posix())
try:
import binaryninja # noqa: F401 unused import
return True
except ImportError:
return False
def is_vivisect_installed() -> bool:
try:
return importlib.util.find_spec("vivisect") is not None
except ModuleNotFoundError:
return False
def load_vivisect() -> bool:
try:
import vivisect # noqa: F401 unused import
return True
except ImportError:
return False
def main():
logging.basicConfig(level=logging.INFO)
table = rich.table.Table()
table.add_column("backend")
table.add_column("already installed?")
table.add_column("found?")
table.add_column("loads?")
if True:
row = ["vivisect"]
if is_vivisect_installed():
row.append("True")
row.append("-")
else:
row.append("False")
row.append("False")
row.append(str(load_vivisect()))
table.add_row(*row)
if True:
row = ["Binary Ninja"]
if is_binaryninja_installed():
row.append("True")
row.append("-")
else:
row.append("False")
row.append(str(find_binaryninja() is not None))
row.append(str(load_binaryninja()))
table.add_row(*row)
if True:
row = ["IDA idalib"]
if is_idalib_installed():
row.append("True")
row.append("-")
else:
row.append("False")
row.append(str(find_idalib() is not None))
row.append(str(load_idalib()))
table.add_row(*row)
rich.print(table)
if __name__ == "__main__":
main()