mirror of
https://github.com/mandiant/capa.git
synced 2026-01-23 01:39:00 -08:00
* updating copyright, back to the date of origin of file * updating regex to account for linter violation
241 lines
7.8 KiB
Python
241 lines
7.8 KiB
Python
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
|
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and limitations under the License.
|
|
import sys
|
|
import gzip
|
|
import json
|
|
import inspect
|
|
import logging
|
|
import contextlib
|
|
import importlib.util
|
|
from typing import NoReturn
|
|
from pathlib import Path
|
|
|
|
import tqdm
|
|
|
|
from capa.exceptions import UnsupportedFormatError
|
|
from capa.features.common import (
|
|
FORMAT_PE,
|
|
FORMAT_CAPE,
|
|
FORMAT_SC32,
|
|
FORMAT_SC64,
|
|
FORMAT_DOTNET,
|
|
FORMAT_FREEZE,
|
|
FORMAT_UNKNOWN,
|
|
Format,
|
|
)
|
|
|
|
EXTENSIONS_SHELLCODE_32 = ("sc32", "raw32")
|
|
EXTENSIONS_SHELLCODE_64 = ("sc64", "raw64")
|
|
EXTENSIONS_DYNAMIC = ("json", "json_", "json.gz")
|
|
EXTENSIONS_ELF = "elf_"
|
|
EXTENSIONS_FREEZE = "frz"
|
|
|
|
logger = logging.getLogger("capa")
|
|
|
|
|
|
def hex(n: int) -> str:
|
|
"""render the given number using upper case hex, like: 0x123ABC"""
|
|
if n < 0:
|
|
return f"-0x{(-n):X}"
|
|
else:
|
|
return f"0x{(n):X}"
|
|
|
|
|
|
def get_file_taste(sample_path: Path) -> bytes:
|
|
if not sample_path.exists():
|
|
raise IOError(f"sample path {sample_path} does not exist or cannot be accessed")
|
|
taste = sample_path.open("rb").read(8)
|
|
return taste
|
|
|
|
|
|
def is_runtime_ida():
|
|
return importlib.util.find_spec("idc") is not None
|
|
|
|
|
|
def is_runtime_ghidra():
|
|
try:
|
|
currentProgram # type: ignore [name-defined] # noqa: F821
|
|
except NameError:
|
|
return False
|
|
return True
|
|
|
|
|
|
def assert_never(value) -> NoReturn:
|
|
# careful: python -O will remove this assertion.
|
|
# but this is only used for type checking, so it's ok.
|
|
assert False, f"Unhandled value: {value} ({type(value).__name__})" # noqa: B011
|
|
|
|
|
|
def load_json_from_path(json_path: Path):
|
|
with gzip.open(json_path, "r") as compressed_report:
|
|
try:
|
|
report_json = compressed_report.read()
|
|
except gzip.BadGzipFile:
|
|
report = json.load(json_path.open(encoding="utf-8"))
|
|
else:
|
|
report = json.loads(report_json)
|
|
return report
|
|
|
|
|
|
def get_format_from_report(sample: Path) -> str:
|
|
report = load_json_from_path(sample)
|
|
if "CAPE" in report:
|
|
return FORMAT_CAPE
|
|
|
|
if "target" in report and "info" in report and "behavior" in report:
|
|
# CAPE report that's missing the "CAPE" key,
|
|
# which is not going to be much use, but its correct.
|
|
return FORMAT_CAPE
|
|
|
|
return FORMAT_UNKNOWN
|
|
|
|
|
|
def get_format_from_extension(sample: Path) -> str:
|
|
format_ = FORMAT_UNKNOWN
|
|
if sample.name.endswith(EXTENSIONS_SHELLCODE_32):
|
|
format_ = FORMAT_SC32
|
|
elif sample.name.endswith(EXTENSIONS_SHELLCODE_64):
|
|
format_ = FORMAT_SC64
|
|
elif sample.name.endswith(EXTENSIONS_DYNAMIC):
|
|
format_ = get_format_from_report(sample)
|
|
elif sample.name.endswith(EXTENSIONS_FREEZE):
|
|
format_ = FORMAT_FREEZE
|
|
return format_
|
|
|
|
|
|
def get_auto_format(path: Path) -> str:
|
|
format_ = get_format(path)
|
|
if format_ == FORMAT_UNKNOWN:
|
|
format_ = get_format_from_extension(path)
|
|
if format_ == FORMAT_UNKNOWN:
|
|
raise UnsupportedFormatError()
|
|
return format_
|
|
|
|
|
|
def get_format(sample: Path) -> str:
|
|
# imported locally to avoid import cycle
|
|
from capa.features.extractors.common import extract_format
|
|
from capa.features.extractors.dotnetfile import DotnetFileFeatureExtractor
|
|
|
|
buf = sample.read_bytes()
|
|
|
|
for feature, _ in extract_format(buf):
|
|
if feature == Format(FORMAT_PE):
|
|
dnfile_extractor = DotnetFileFeatureExtractor(sample)
|
|
if dnfile_extractor.is_dotnet_file():
|
|
feature = Format(FORMAT_DOTNET)
|
|
|
|
assert isinstance(feature.value, str)
|
|
return feature.value
|
|
|
|
return FORMAT_UNKNOWN
|
|
|
|
|
|
@contextlib.contextmanager
|
|
def redirecting_print_to_tqdm(disable_progress):
|
|
"""
|
|
tqdm (progress bar) expects to have fairly tight control over console output.
|
|
so calls to `print()` will break the progress bar and make things look bad.
|
|
so, this context manager temporarily replaces the `print` implementation
|
|
with one that is compatible with tqdm.
|
|
via: https://stackoverflow.com/a/42424890/87207
|
|
"""
|
|
old_print = print # noqa: T202 [reserved word print used]
|
|
|
|
def new_print(*args, **kwargs):
|
|
# If tqdm.tqdm.write raises error, use builtin print
|
|
if disable_progress:
|
|
old_print(*args, **kwargs)
|
|
else:
|
|
try:
|
|
tqdm.tqdm.write(*args, **kwargs)
|
|
except Exception:
|
|
old_print(*args, **kwargs)
|
|
|
|
try:
|
|
# Globally replace print with new_print.
|
|
# Verified this works manually on Python 3.11:
|
|
# >>> import inspect
|
|
# >>> inspect.builtins
|
|
# <module 'builtins' (built-in)>
|
|
inspect.builtins.print = new_print # type: ignore
|
|
yield
|
|
finally:
|
|
inspect.builtins.print = old_print # type: ignore
|
|
|
|
|
|
def log_unsupported_format_error():
|
|
logger.error("-" * 80)
|
|
logger.error(" Input file does not appear to be a supported file.")
|
|
logger.error(" ")
|
|
logger.error(" See all supported file formats via capa's help output (-h).")
|
|
logger.error(" If you don't know the input file type, you can try using the `file` utility to guess it.")
|
|
logger.error("-" * 80)
|
|
|
|
|
|
def log_unsupported_cape_report_error(error: str):
|
|
logger.error("-" * 80)
|
|
logger.error(" Input file is not a valid CAPE report: %s", error)
|
|
logger.error(" ")
|
|
logger.error(" capa currently only supports analyzing standard CAPE reports in JSON format.")
|
|
logger.error(
|
|
" Please make sure your report file is in the standard format and contains both the static and dynamic sections."
|
|
)
|
|
logger.error("-" * 80)
|
|
|
|
|
|
def log_empty_cape_report_error(error: str):
|
|
logger.error("-" * 80)
|
|
logger.error(" CAPE report is empty or only contains little useful data: %s", error)
|
|
logger.error(" ")
|
|
logger.error(" Please make sure the sandbox run captures useful behaviour of your sample.")
|
|
logger.error("-" * 80)
|
|
|
|
|
|
def log_unsupported_os_error():
|
|
logger.error("-" * 80)
|
|
logger.error(" Input file does not appear to target a supported OS.")
|
|
logger.error(" ")
|
|
logger.error(
|
|
" capa currently only supports analyzing executables for some operating systems (including Windows and Linux)."
|
|
)
|
|
logger.error("-" * 80)
|
|
|
|
|
|
def log_unsupported_arch_error():
|
|
logger.error("-" * 80)
|
|
logger.error(" Input file does not appear to target a supported architecture.")
|
|
logger.error(" ")
|
|
logger.error(" capa currently only supports analyzing x86 (32- and 64-bit).")
|
|
logger.error("-" * 80)
|
|
|
|
|
|
def log_unsupported_runtime_error():
|
|
logger.error("-" * 80)
|
|
logger.error(" Unsupported runtime or Python interpreter.")
|
|
logger.error(" ")
|
|
logger.error(" capa supports running under Python 3.8 and higher.")
|
|
logger.error(" ")
|
|
logger.error(
|
|
" If you're seeing this message on the command line, please ensure you're running a supported Python version."
|
|
)
|
|
logger.error("-" * 80)
|
|
|
|
|
|
def is_running_standalone() -> bool:
|
|
"""
|
|
are we running from a PyInstaller'd executable?
|
|
if so, then we'll be able to access `sys._MEIPASS` for the packaged resources.
|
|
"""
|
|
# typically we only expect capa.main to be packaged via PyInstaller.
|
|
# therefore, this *should* be in capa.main; however,
|
|
# the Binary Ninja extractor uses this to resolve the BN API code,
|
|
# so we keep this in a common area.
|
|
# generally, other library code should not use this function.
|
|
return hasattr(sys, "frozen") and hasattr(sys, "_MEIPASS")
|