mirror of
https://github.com/mandiant/capa.git
synced 2025-12-12 15:49:46 -08:00
feat: start dotnet detection (#955)
* feat: start dotnet detection * Apply suggestions from code review Co-authored-by: Willi Ballenthin <willi.ballenthin@gmail.com> * refactor: dn instead of dotnet * refactor: format branches, extractor reorg * refactor: format selection and dotnet detect * feat: get format, arch, os * refactor: log errors and exceptions * ci: also test and build for dotnet-main dev * fix: import path * fix: circular dep * fix: remove buf argument feat: get runtime meta data * fix: log unsupported runtime error * fix: type ignore Co-authored-by: Willi Ballenthin <willi.ballenthin@gmail.com>
This commit is contained in:
2
.github/workflows/build.yml
vendored
2
.github/workflows/build.yml
vendored
@@ -2,7 +2,7 @@ name: build
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [master]
|
||||
branches: [master, dotnet-main]
|
||||
release:
|
||||
types: [edited, published]
|
||||
|
||||
|
||||
4
.github/workflows/tests.yml
vendored
4
.github/workflows/tests.yml
vendored
@@ -2,9 +2,9 @@ name: CI
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ master ]
|
||||
branches: [ master, dotnet-main ]
|
||||
pull_request:
|
||||
branches: [ master ]
|
||||
branches: [ master, dotnet-main ]
|
||||
|
||||
# save workspaces to speed up testing
|
||||
env:
|
||||
|
||||
11
CHANGELOG.md
11
CHANGELOG.md
@@ -4,14 +4,15 @@
|
||||
|
||||
### New Features
|
||||
|
||||
- add new scope "instruction" for matching mnemonics and operands #767 @williballenthin
|
||||
- add new feature "operand[{0, 1, 2}].number" for matching instruction operand immediate values #767 @williballenthin
|
||||
- add new feature "operand[{0, 1, 2}].offset" for matching instruction operand offsets #767 @williballenthin
|
||||
- add new scope "instruction" for matching mnemonics and operands #767 @williballenthin
|
||||
- add new feature "operand[{0, 1, 2}].number" for matching instruction operand immediate values #767 @williballenthin
|
||||
- add new feature "operand[{0, 1, 2}].offset" for matching instruction operand offsets #767 @williballenthin
|
||||
- main: detect dotnet binaries #955 @mr-tz
|
||||
|
||||
### Breaking Changes
|
||||
|
||||
- instruction scope and operand feature are new and are not backwards compatible with older versions of capa
|
||||
- Python 3.7 is now the minimum supported Python version #866 @williballenthin
|
||||
- instruction scope and operand feature are new and are not backwards compatible with older versions of capa
|
||||
- Python 3.7 is now the minimum supported Python version #866 @williballenthin
|
||||
|
||||
### New Rules (4)
|
||||
|
||||
|
||||
14
capa/exceptions.py
Normal file
14
capa/exceptions.py
Normal file
@@ -0,0 +1,14 @@
|
||||
class UnsupportedRuntimeError(RuntimeError):
|
||||
pass
|
||||
|
||||
|
||||
class UnsupportedFormatError(ValueError):
|
||||
pass
|
||||
|
||||
|
||||
class UnsupportedArchError(ValueError):
|
||||
pass
|
||||
|
||||
|
||||
class UnsupportedOSError(ValueError):
|
||||
pass
|
||||
@@ -410,7 +410,9 @@ VALID_BITNESS = (BITNESS_X32, BITNESS_X64)
|
||||
# other candidates here: https://docs.microsoft.com/en-us/windows/win32/debug/pe-format#machine-types
|
||||
ARCH_I386 = "i386"
|
||||
ARCH_AMD64 = "amd64"
|
||||
VALID_ARCH = (ARCH_I386, ARCH_AMD64)
|
||||
# dotnet
|
||||
ARCH_ANY = "any"
|
||||
VALID_ARCH = (ARCH_I386, ARCH_AMD64, ARCH_ANY)
|
||||
|
||||
|
||||
class Arch(Feature):
|
||||
@@ -422,8 +424,10 @@ class Arch(Feature):
|
||||
OS_WINDOWS = "windows"
|
||||
OS_LINUX = "linux"
|
||||
OS_MACOS = "macos"
|
||||
# dotnet
|
||||
OS_ANY = "any"
|
||||
VALID_OS = {os.value for os in capa.features.extractors.elf.OS}
|
||||
VALID_OS.update({OS_WINDOWS, OS_LINUX, OS_MACOS})
|
||||
VALID_OS.update({OS_WINDOWS, OS_LINUX, OS_MACOS, OS_ANY})
|
||||
|
||||
|
||||
class OS(Feature):
|
||||
@@ -434,7 +438,14 @@ class OS(Feature):
|
||||
|
||||
FORMAT_PE = "pe"
|
||||
FORMAT_ELF = "elf"
|
||||
VALID_FORMAT = (FORMAT_PE, FORMAT_ELF)
|
||||
FORMAT_DOTNET = "dotnet"
|
||||
VALID_FORMAT = (FORMAT_PE, FORMAT_ELF, FORMAT_DOTNET)
|
||||
# internal only, not to be used in rules
|
||||
FORMAT_AUTO = "auto"
|
||||
FORMAT_SC32 = "sc32"
|
||||
FORMAT_SC64 = "sc64"
|
||||
FORMAT_FREEZE = "freeze"
|
||||
FORMAT_UNKNOWN = "unknown"
|
||||
|
||||
|
||||
class Format(Feature):
|
||||
|
||||
@@ -8,7 +8,8 @@ import pefile
|
||||
import capa.features
|
||||
import capa.features.extractors.elf
|
||||
import capa.features.extractors.pefile
|
||||
from capa.features.common import OS, FORMAT_PE, FORMAT_ELF, OS_WINDOWS, Arch, Format, String
|
||||
from capa.features.common import OS, FORMAT_PE, FORMAT_ELF, OS_WINDOWS, FORMAT_FREEZE, Arch, Format, String
|
||||
from capa.features.freeze import is_freeze
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -29,6 +30,8 @@ def extract_format(buf):
|
||||
yield Format(FORMAT_PE), 0x0
|
||||
elif buf.startswith(b"\x7fELF"):
|
||||
yield Format(FORMAT_ELF), 0x0
|
||||
elif is_freeze(buf):
|
||||
yield Format(FORMAT_FREEZE), 0x0
|
||||
else:
|
||||
# we likely end up here:
|
||||
# 1. handling a file format (e.g. macho)
|
||||
|
||||
105
capa/features/extractors/dnfile_.py
Normal file
105
capa/features/extractors/dnfile_.py
Normal file
@@ -0,0 +1,105 @@
|
||||
import logging
|
||||
from typing import Tuple, Iterator
|
||||
|
||||
import dnfile
|
||||
|
||||
from capa.features.common import OS, OS_ANY, ARCH_ANY, ARCH_I386, ARCH_AMD64, FORMAT_DOTNET, Arch, Format, Feature
|
||||
from capa.features.extractors.base_extractor import FeatureExtractor
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def extract_file_format(**kwargs):
|
||||
yield Format(FORMAT_DOTNET), 0x0
|
||||
|
||||
|
||||
def extract_file_os(**kwargs):
|
||||
yield OS(OS_ANY), 0x0
|
||||
|
||||
|
||||
def extract_file_arch(pe, **kwargs):
|
||||
# TODO differences for versions < 4.5?
|
||||
# via https://stackoverflow.com/a/23614024/10548020
|
||||
if pe.net.Flags.CLR_32BITREQUIRED and pe.net.Flags.CLR_PREFER_32BIT:
|
||||
yield Arch(ARCH_I386), 0x0
|
||||
elif not pe.net.Flags.CLR_32BITREQUIRED and not pe.net.Flags.CLR_PREFER_32BIT:
|
||||
yield Arch(ARCH_AMD64), 0x0
|
||||
else:
|
||||
yield Arch(ARCH_ANY), 0x0
|
||||
|
||||
|
||||
def extract_file_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, int]]:
|
||||
for file_handler in FILE_HANDLERS:
|
||||
for feature, va in file_handler(pe=pe): # type: ignore
|
||||
yield feature, va
|
||||
|
||||
|
||||
FILE_HANDLERS = (
|
||||
# extract_file_export_names,
|
||||
# extract_file_import_names,
|
||||
# extract_file_section_names,
|
||||
# extract_file_strings,
|
||||
# extract_file_function_names,
|
||||
extract_file_format,
|
||||
)
|
||||
|
||||
|
||||
def extract_global_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, int]]:
|
||||
for handler in GLOBAL_HANDLERS:
|
||||
for feature, va in handler(pe=pe): # type: ignore
|
||||
yield feature, va
|
||||
|
||||
|
||||
GLOBAL_HANDLERS = (
|
||||
extract_file_os,
|
||||
extract_file_arch,
|
||||
)
|
||||
|
||||
|
||||
class DnfileFeatureExtractor(FeatureExtractor):
|
||||
def __init__(self, path: str):
|
||||
super(DnfileFeatureExtractor, self).__init__()
|
||||
self.path: str = path
|
||||
self.pe: dnfile.dnPE = dnfile.dnPE(path)
|
||||
|
||||
def get_base_address(self) -> int:
|
||||
return self.pe.net.struct.EntryPointTokenOrRva
|
||||
|
||||
def extract_global_features(self):
|
||||
yield from extract_global_features(self.pe)
|
||||
|
||||
def extract_file_features(self):
|
||||
yield from extract_file_features(self.pe)
|
||||
|
||||
def is_dotnet_file(self) -> bool:
|
||||
return bool(self.pe.net)
|
||||
|
||||
def get_runtime_version(self) -> Tuple[int, int]:
|
||||
return self.pe.net.struct.MajorRuntimeVersion, self.pe.net.struct.MinorRuntimeVersion
|
||||
|
||||
def get_meta_version_string(self) -> str:
|
||||
return self.pe.net.metadata.struct.Version.decode("utf-8")
|
||||
|
||||
def get_functions(self):
|
||||
raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features")
|
||||
|
||||
def extract_function_features(self, f):
|
||||
raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features")
|
||||
|
||||
def get_basic_blocks(self, f):
|
||||
raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features")
|
||||
|
||||
def extract_basic_block_features(self, f, bb):
|
||||
raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features")
|
||||
|
||||
def get_instructions(self, f, bb):
|
||||
raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features")
|
||||
|
||||
def extract_insn_features(self, f, bb, insn):
|
||||
raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features")
|
||||
|
||||
def is_library_function(self, va):
|
||||
raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features")
|
||||
|
||||
def get_function_name(self, va):
|
||||
raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features")
|
||||
@@ -53,13 +53,12 @@ import zlib
|
||||
import logging
|
||||
from typing import Dict, Type
|
||||
|
||||
import capa.helpers
|
||||
import capa.features.file
|
||||
import capa.features.insn
|
||||
import capa.features.common
|
||||
import capa.features.basicblock
|
||||
import capa.features.extractors.base_extractor
|
||||
from capa.helpers import hex
|
||||
from capa.features.common import Feature
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -87,6 +86,7 @@ def dumps(extractor):
|
||||
returns:
|
||||
str: the serialized features.
|
||||
"""
|
||||
hex = capa.helpers.hex
|
||||
ret = {
|
||||
"version": 1,
|
||||
"base address": extractor.get_base_address(),
|
||||
|
||||
@@ -5,10 +5,20 @@
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import os
|
||||
import logging
|
||||
from typing import NoReturn
|
||||
|
||||
from capa.exceptions import UnsupportedFormatError
|
||||
from capa.features.common import FORMAT_SC32, FORMAT_SC64, FORMAT_UNKNOWN
|
||||
from capa.features.extractors.common import extract_format
|
||||
|
||||
EXTENSIONS_SHELLCODE_32 = ("sc32", "raw32")
|
||||
EXTENSIONS_SHELLCODE_64 = ("sc64", "raw64")
|
||||
|
||||
|
||||
logger = logging.getLogger("capa")
|
||||
|
||||
_hex = hex
|
||||
|
||||
|
||||
@@ -35,3 +45,72 @@ def is_runtime_ida():
|
||||
|
||||
def assert_never(value: NoReturn) -> NoReturn:
|
||||
assert False, f"Unhandled value: {value} ({type(value).__name__})"
|
||||
|
||||
|
||||
def get_format_from_extension(sample: str) -> str:
|
||||
if sample.endswith(EXTENSIONS_SHELLCODE_32):
|
||||
return FORMAT_SC32
|
||||
elif sample.endswith(EXTENSIONS_SHELLCODE_64):
|
||||
return FORMAT_SC64
|
||||
return FORMAT_UNKNOWN
|
||||
|
||||
|
||||
def get_auto_format(path: str) -> str:
|
||||
format_ = get_format(path)
|
||||
if format_ == FORMAT_UNKNOWN:
|
||||
format_ = get_format_from_extension(path)
|
||||
if format_ == FORMAT_UNKNOWN:
|
||||
raise UnsupportedFormatError()
|
||||
return format_
|
||||
|
||||
|
||||
def get_format(sample: str) -> str:
|
||||
with open(sample, "rb") as f:
|
||||
buf = f.read()
|
||||
|
||||
for feature, _ in extract_format(buf):
|
||||
assert isinstance(feature.value, str)
|
||||
return feature.value
|
||||
|
||||
return FORMAT_UNKNOWN
|
||||
|
||||
|
||||
def log_unsupported_format_error():
|
||||
logger.error("-" * 80)
|
||||
logger.error(" Input file does not appear to be a PE or ELF file.")
|
||||
logger.error(" ")
|
||||
logger.error(
|
||||
" capa currently only supports analyzing PE and ELF files (or shellcode, when using --format sc32|sc64)."
|
||||
)
|
||||
logger.error(" If you don't know the input file type, you can try using the `file` utility to guess it.")
|
||||
logger.error("-" * 80)
|
||||
|
||||
|
||||
def log_unsupported_os_error():
|
||||
logger.error("-" * 80)
|
||||
logger.error(" Input file does not appear to target a supported OS.")
|
||||
logger.error(" ")
|
||||
logger.error(
|
||||
" capa currently only supports analyzing executables for some operating systems (including Windows and Linux)."
|
||||
)
|
||||
logger.error("-" * 80)
|
||||
|
||||
|
||||
def log_unsupported_arch_error():
|
||||
logger.error("-" * 80)
|
||||
logger.error(" Input file does not appear to target a supported architecture.")
|
||||
logger.error(" ")
|
||||
logger.error(" capa currently only supports analyzing x86 (32- and 64-bit).")
|
||||
logger.error("-" * 80)
|
||||
|
||||
|
||||
def log_unsupported_runtime_error():
|
||||
logger.error("-" * 80)
|
||||
logger.error(" Unsupported runtime or Python interpreter.")
|
||||
logger.error(" ")
|
||||
logger.error(" capa supports running under Python 3.7 and higher.")
|
||||
logger.error(" ")
|
||||
logger.error(
|
||||
" If you're seeing this message on the command line, please ensure you're running a supported Python version."
|
||||
)
|
||||
logger.error("-" * 80)
|
||||
|
||||
210
capa/main.py
210
capa/main.py
@@ -41,18 +41,35 @@ import capa.render.vverbose
|
||||
import capa.features.extractors
|
||||
import capa.features.extractors.common
|
||||
import capa.features.extractors.pefile
|
||||
import capa.features.extractors.dnfile_
|
||||
import capa.features.extractors.elffile
|
||||
from capa.rules import Rule, Scope, RuleSet
|
||||
from capa.engine import FeatureSet, MatchResults
|
||||
from capa.helpers import get_file_taste
|
||||
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle, FeatureExtractor
|
||||
from capa.helpers import (
|
||||
get_format,
|
||||
get_file_taste,
|
||||
get_auto_format,
|
||||
log_unsupported_os_error,
|
||||
log_unsupported_arch_error,
|
||||
log_unsupported_format_error,
|
||||
)
|
||||
from capa.exceptions import UnsupportedOSError, UnsupportedArchError, UnsupportedFormatError, UnsupportedRuntimeError
|
||||
from capa.features.common import (
|
||||
FORMAT_PE,
|
||||
FORMAT_ELF,
|
||||
FORMAT_AUTO,
|
||||
FORMAT_SC32,
|
||||
FORMAT_SC64,
|
||||
FORMAT_DOTNET,
|
||||
FORMAT_FREEZE,
|
||||
)
|
||||
from capa.features.extractors.base_extractor import FunctionHandle, FeatureExtractor
|
||||
|
||||
RULES_PATH_DEFAULT_STRING = "(embedded rules)"
|
||||
SIGNATURES_PATH_DEFAULT_STRING = "(embedded signatures)"
|
||||
BACKEND_VIV = "vivisect"
|
||||
BACKEND_SMDA = "smda"
|
||||
EXTENSIONS_SHELLCODE_32 = ("sc32", "raw32")
|
||||
EXTENSIONS_SHELLCODE_64 = ("sc64", "raw64")
|
||||
BACKEND_DOTNET = "dotnet"
|
||||
|
||||
E_MISSING_RULES = -10
|
||||
E_MISSING_FILE = -11
|
||||
@@ -287,6 +304,7 @@ def find_capabilities(ruleset: RuleSet, extractor: FeatureExtractor, disable_pro
|
||||
return matches, meta
|
||||
|
||||
|
||||
# TODO move all to helpers?
|
||||
def has_rule_with_namespace(rules, capabilities, rule_cat):
|
||||
for rule_name in capabilities.keys():
|
||||
if rules.rules[rule_name].meta.get("namespace", "").startswith(rule_cat):
|
||||
@@ -334,17 +352,6 @@ def is_supported_format(sample: str) -> bool:
|
||||
return len(list(capa.features.extractors.common.extract_format(taste))) == 1
|
||||
|
||||
|
||||
def get_format(sample: str) -> str:
|
||||
with open(sample, "rb") as f:
|
||||
buf = f.read()
|
||||
|
||||
for feature, _ in capa.features.extractors.common.extract_format(buf):
|
||||
assert isinstance(feature.value, str)
|
||||
return feature.value
|
||||
|
||||
return "unknown"
|
||||
|
||||
|
||||
def is_supported_arch(sample: str) -> bool:
|
||||
with open(sample, "rb") as f:
|
||||
buf = f.read()
|
||||
@@ -433,19 +440,7 @@ def get_default_signatures() -> List[str]:
|
||||
return ret
|
||||
|
||||
|
||||
class UnsupportedFormatError(ValueError):
|
||||
pass
|
||||
|
||||
|
||||
class UnsupportedArchError(ValueError):
|
||||
pass
|
||||
|
||||
|
||||
class UnsupportedOSError(ValueError):
|
||||
pass
|
||||
|
||||
|
||||
def get_workspace(path, format, sigpaths):
|
||||
def get_workspace(path, format_, sigpaths):
|
||||
"""
|
||||
load the program at the given path into a vivisect workspace using the given format.
|
||||
also apply the given FLIRT signatures.
|
||||
@@ -465,21 +460,22 @@ def get_workspace(path, format, sigpaths):
|
||||
import viv_utils
|
||||
|
||||
logger.debug("generating vivisect workspace for: %s", path)
|
||||
if format == "auto":
|
||||
# TODO should not be auto at this point, anymore
|
||||
if format_ == FORMAT_AUTO:
|
||||
if not is_supported_format(path):
|
||||
raise UnsupportedFormatError()
|
||||
|
||||
# don't analyze, so that we can add our Flirt function analyzer first.
|
||||
vw = viv_utils.getWorkspace(path, analyze=False, should_save=False)
|
||||
elif format in {"pe", "elf"}:
|
||||
elif format_ in {FORMAT_PE, FORMAT_ELF}:
|
||||
vw = viv_utils.getWorkspace(path, analyze=False, should_save=False)
|
||||
elif format == "sc32":
|
||||
elif format_ == FORMAT_SC32:
|
||||
# these are not analyzed nor saved.
|
||||
vw = viv_utils.getShellcodeWorkspaceFromFile(path, arch="i386", analyze=False)
|
||||
elif format == "sc64":
|
||||
elif format_ == FORMAT_SC64:
|
||||
vw = viv_utils.getShellcodeWorkspaceFromFile(path, arch="amd64", analyze=False)
|
||||
else:
|
||||
raise ValueError("unexpected format: " + format)
|
||||
raise ValueError("unexpected format: " + format_)
|
||||
|
||||
viv_utils.flirt.register_flirt_signature_analyzers(vw, sigpaths)
|
||||
|
||||
@@ -489,12 +485,9 @@ def get_workspace(path, format, sigpaths):
|
||||
return vw
|
||||
|
||||
|
||||
class UnsupportedRuntimeError(RuntimeError):
|
||||
pass
|
||||
|
||||
|
||||
# TODO get_extractors -> List[FeatureExtractor]?
|
||||
def get_extractor(
|
||||
path: str, format: str, backend: str, sigpaths: List[str], should_save_workspace=False, disable_progress=False
|
||||
path: str, format_: str, backend: str, sigpaths: List[str], should_save_workspace=False, disable_progress=False
|
||||
) -> FeatureExtractor:
|
||||
"""
|
||||
raises:
|
||||
@@ -502,7 +495,7 @@ def get_extractor(
|
||||
UnsupportedArchError
|
||||
UnsupportedOSError
|
||||
"""
|
||||
if format not in ("sc32", "sc64"):
|
||||
if format_ not in (FORMAT_SC32, FORMAT_SC64):
|
||||
if not is_supported_format(path):
|
||||
raise UnsupportedFormatError()
|
||||
|
||||
@@ -512,6 +505,10 @@ def get_extractor(
|
||||
if not is_supported_os(path):
|
||||
raise UnsupportedOSError()
|
||||
|
||||
if format_ == FORMAT_DOTNET:
|
||||
# TODO return capa.features.extractors.dotnet.extractor.DnFeatureExtractor(...)
|
||||
raise NotImplementedError("DnFeatureExtractor")
|
||||
|
||||
if backend == "smda":
|
||||
from smda.SmdaConfig import SmdaConfig
|
||||
from smda.Disassembler import Disassembler
|
||||
@@ -530,7 +527,7 @@ def get_extractor(
|
||||
import capa.features.extractors.viv.extractor
|
||||
|
||||
with halo.Halo(text="analyzing program", spinner="simpleDots", stream=sys.stderr, enabled=not disable_progress):
|
||||
vw = get_workspace(path, format, sigpaths)
|
||||
vw = get_workspace(path, format_, sigpaths)
|
||||
|
||||
if should_save_workspace:
|
||||
logger.debug("saving workspace")
|
||||
@@ -545,6 +542,22 @@ def get_extractor(
|
||||
return capa.features.extractors.viv.extractor.VivisectFeatureExtractor(vw, path)
|
||||
|
||||
|
||||
def get_file_extractors(sample: str, format_: str) -> List[FeatureExtractor]:
|
||||
file_extractors: List[FeatureExtractor] = list()
|
||||
|
||||
if format_ == capa.features.extractors.common.FORMAT_PE:
|
||||
file_extractors.append(capa.features.extractors.pefile.PefileFeatureExtractor(sample))
|
||||
|
||||
dnfile_extractor = capa.features.extractors.dnfile_.DnfileFeatureExtractor(sample)
|
||||
if dnfile_extractor.is_dotnet_file():
|
||||
file_extractors.append(dnfile_extractor)
|
||||
|
||||
elif format_ == capa.features.extractors.common.FORMAT_ELF:
|
||||
file_extractors.append(capa.features.extractors.elffile.ElfFeatureExtractor(sample))
|
||||
|
||||
return file_extractors
|
||||
|
||||
|
||||
def is_nursery_rule_path(path: str) -> bool:
|
||||
"""
|
||||
The nursery is a spot for rules that have not yet been fully polished.
|
||||
@@ -652,7 +665,7 @@ def collect_metadata(argv, sample_path, rules_path, extractor):
|
||||
if rules_path != RULES_PATH_DEFAULT_STRING:
|
||||
rules_path = os.path.abspath(os.path.normpath(rules_path))
|
||||
|
||||
format = get_format(sample_path)
|
||||
format_ = get_format(sample_path)
|
||||
arch = get_arch(sample_path)
|
||||
os_ = get_os(sample_path)
|
||||
|
||||
@@ -667,7 +680,7 @@ def collect_metadata(argv, sample_path, rules_path, extractor):
|
||||
"path": os.path.normpath(sample_path),
|
||||
},
|
||||
"analysis": {
|
||||
"format": format,
|
||||
"format": format_,
|
||||
"arch": arch,
|
||||
"os": os_,
|
||||
"extractor": extractor.__class__.__name__,
|
||||
@@ -782,19 +795,20 @@ def install_common_args(parser, wanted=None):
|
||||
|
||||
if "format" in wanted:
|
||||
formats = [
|
||||
("auto", "(default) detect file type automatically"),
|
||||
("pe", "Windows PE file"),
|
||||
("elf", "Executable and Linkable Format"),
|
||||
("sc32", "32-bit shellcode"),
|
||||
("sc64", "64-bit shellcode"),
|
||||
("freeze", "features previously frozen by capa"),
|
||||
(FORMAT_AUTO, "(default) detect file type automatically"),
|
||||
(FORMAT_PE, "Windows PE file"),
|
||||
(FORMAT_DOTNET, ".NET PE file"),
|
||||
(FORMAT_ELF, "Executable and Linkable Format"),
|
||||
(FORMAT_SC32, "32-bit shellcode"),
|
||||
(FORMAT_SC64, "64-bit shellcode"),
|
||||
(FORMAT_FREEZE, "features previously frozen by capa"),
|
||||
]
|
||||
format_help = ", ".join(["%s: %s" % (f[0], f[1]) for f in formats])
|
||||
parser.add_argument(
|
||||
"-f",
|
||||
"--format",
|
||||
choices=[f[0] for f in formats],
|
||||
default="auto",
|
||||
default=FORMAT_AUTO,
|
||||
help="select sample format, %s" % format_help,
|
||||
)
|
||||
|
||||
@@ -963,13 +977,21 @@ def main(argv=None):
|
||||
return ret
|
||||
|
||||
try:
|
||||
taste = get_file_taste(args.sample)
|
||||
_ = get_file_taste(args.sample)
|
||||
except IOError as e:
|
||||
# per our research there's not a programmatic way to render the IOError with non-ASCII filename unless we
|
||||
# handle the IOError separately and reach into the args
|
||||
logger.error("%s", e.args[0])
|
||||
return E_MISSING_FILE
|
||||
|
||||
format_ = args.format
|
||||
if format_ == FORMAT_AUTO:
|
||||
try:
|
||||
format_ = get_auto_format(args.sample)
|
||||
except UnsupportedFormatError:
|
||||
log_unsupported_format_error()
|
||||
return E_INVALID_FILE_TYPE
|
||||
|
||||
try:
|
||||
rules = get_rules(args.rules, disable_progress=args.quiet)
|
||||
rules = capa.rules.RuleSet(rules)
|
||||
@@ -991,26 +1013,23 @@ def main(argv=None):
|
||||
logger.error("%s", str(e))
|
||||
return E_INVALID_RULE
|
||||
|
||||
file_extractor = None
|
||||
if args.format == "pe" or (args.format == "auto" and taste.startswith(b"MZ")):
|
||||
# these pefile and elffile file feature extractors are pretty light weight: they don't do any code analysis.
|
||||
# so we can fairly quickly determine if the given file has "pure" file-scope rules
|
||||
# that indicate a limitation (like "file is packed based on section names")
|
||||
# and avoid doing a full code analysis on difficult/impossible binaries.
|
||||
try:
|
||||
file_extractor = capa.features.extractors.pefile.PefileFeatureExtractor(args.sample)
|
||||
except PEFormatError as e:
|
||||
logger.error("Input file '%s' is not a valid PE file: %s", args.sample, str(e))
|
||||
return E_CORRUPT_FILE
|
||||
# file feature extractors are pretty lightweight: they don't do any code analysis.
|
||||
# so we can fairly quickly determine if the given file has "pure" file-scope rules
|
||||
# that indicate a limitation (like "file is packed based on section names")
|
||||
# and avoid doing a full code analysis on difficult/impossible binaries.
|
||||
#
|
||||
# this pass can inspect multiple file extractors, e.g., dotnet and pe to identify
|
||||
# various limitations
|
||||
try:
|
||||
file_extractors = get_file_extractors(args.sample, format_)
|
||||
except PEFormatError as e:
|
||||
logger.error("Input file '%s' is not a valid PE file: %s", args.sample, str(e))
|
||||
return E_CORRUPT_FILE
|
||||
except (ELFError, OverflowError) as e:
|
||||
logger.error("Input file '%s' is not a valid ELF file: %s", args.sample, str(e))
|
||||
return E_CORRUPT_FILE
|
||||
|
||||
elif args.format == "elf" or (args.format == "auto" and taste.startswith(b"\x7fELF")):
|
||||
try:
|
||||
file_extractor = capa.features.extractors.elffile.ElfFeatureExtractor(args.sample)
|
||||
except (ELFError, OverflowError) as e:
|
||||
logger.error("Input file '%s' is not a valid ELF file: %s", args.sample, str(e))
|
||||
return E_CORRUPT_FILE
|
||||
|
||||
if file_extractor:
|
||||
for file_extractor in file_extractors:
|
||||
try:
|
||||
pure_file_capabilities, _ = find_file_capabilities(rules, file_extractor, {})
|
||||
except PEFormatError as e:
|
||||
@@ -1029,58 +1048,37 @@ def main(argv=None):
|
||||
logger.debug("file limitation short circuit, won't analyze fully.")
|
||||
return E_FILE_LIMITATION
|
||||
|
||||
try:
|
||||
if args.format == "pe" or (args.format == "auto" and taste.startswith(b"MZ")):
|
||||
sig_paths = get_signatures(args.signatures)
|
||||
else:
|
||||
sig_paths = []
|
||||
logger.debug("skipping library code matching: only have PE signatures")
|
||||
except (IOError) as e:
|
||||
logger.error("%s", str(e))
|
||||
return E_INVALID_SIG
|
||||
if isinstance(file_extractor, capa.features.extractors.dnfile_.DnfileFeatureExtractor):
|
||||
format_ = FORMAT_DOTNET
|
||||
|
||||
if (args.format == "freeze") or (args.format == "auto" and capa.features.freeze.is_freeze(taste)):
|
||||
format = "freeze"
|
||||
if format_ == FORMAT_FREEZE:
|
||||
with open(args.sample, "rb") as f:
|
||||
extractor = capa.features.freeze.load(f.read())
|
||||
else:
|
||||
format = args.format
|
||||
if format == "auto" and args.sample.endswith(EXTENSIONS_SHELLCODE_32):
|
||||
format = "sc32"
|
||||
elif format == "auto" and args.sample.endswith(EXTENSIONS_SHELLCODE_64):
|
||||
format = "sc64"
|
||||
try:
|
||||
if format_ == FORMAT_PE:
|
||||
sig_paths = get_signatures(args.signatures)
|
||||
else:
|
||||
sig_paths = []
|
||||
logger.debug("skipping library code matching: only have native PE signatures")
|
||||
except IOError as e:
|
||||
logger.error("%s", str(e))
|
||||
return E_INVALID_SIG
|
||||
|
||||
should_save_workspace = os.environ.get("CAPA_SAVE_WORKSPACE") not in ("0", "no", "NO", "n", None)
|
||||
|
||||
try:
|
||||
extractor = get_extractor(
|
||||
args.sample, format, args.backend, sig_paths, should_save_workspace, disable_progress=args.quiet
|
||||
args.sample, format_, args.backend, sig_paths, should_save_workspace, disable_progress=args.quiet
|
||||
)
|
||||
except UnsupportedFormatError:
|
||||
logger.error("-" * 80)
|
||||
logger.error(" Input file does not appear to be a PE or ELF file.")
|
||||
logger.error(" ")
|
||||
logger.error(
|
||||
" capa currently only supports analyzing PE and ELF files (or shellcode, when using --format sc32|sc64)."
|
||||
)
|
||||
logger.error(" If you don't know the input file type, you can try using the `file` utility to guess it.")
|
||||
logger.error("-" * 80)
|
||||
log_unsupported_format_error()
|
||||
return E_INVALID_FILE_TYPE
|
||||
except UnsupportedArchError:
|
||||
logger.error("-" * 80)
|
||||
logger.error(" Input file does not appear to target a supported architecture.")
|
||||
logger.error(" ")
|
||||
logger.error(" capa currently only supports analyzing x86 (32- and 64-bit).")
|
||||
logger.error("-" * 80)
|
||||
log_unsupported_arch_error()
|
||||
return E_INVALID_FILE_ARCH
|
||||
except UnsupportedOSError:
|
||||
logger.error("-" * 80)
|
||||
logger.error(" Input file does not appear to target a supported OS.")
|
||||
logger.error(" ")
|
||||
logger.error(
|
||||
" capa currently only supports analyzing executables for some operating systems (including Windows and Linux)."
|
||||
)
|
||||
logger.error("-" * 80)
|
||||
log_unsupported_os_error()
|
||||
return E_INVALID_FILE_OS
|
||||
|
||||
meta = collect_metadata(argv, args.sample, args.rules, extractor)
|
||||
|
||||
@@ -7,9 +7,9 @@
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import json
|
||||
|
||||
import capa.render.result_document
|
||||
from capa.rules import RuleSet
|
||||
from capa.engine import MatchResults
|
||||
from capa.render.result_document import convert_capabilities_to_result_document
|
||||
|
||||
|
||||
class CapaJsonObjectEncoder(json.JSONEncoder):
|
||||
@@ -27,7 +27,7 @@ class CapaJsonObjectEncoder(json.JSONEncoder):
|
||||
|
||||
def render(meta, rules: RuleSet, capabilities: MatchResults) -> str:
|
||||
return json.dumps(
|
||||
capa.render.result_document.convert_capabilities_to_result_document(meta, rules, capabilities),
|
||||
convert_capabilities_to_result_document(meta, rules, capabilities),
|
||||
cls=CapaJsonObjectEncoder,
|
||||
sort_keys=True,
|
||||
)
|
||||
|
||||
@@ -7,7 +7,6 @@
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import copy
|
||||
|
||||
import capa.rules
|
||||
import capa.engine
|
||||
import capa.render.utils
|
||||
import capa.features.common
|
||||
|
||||
@@ -41,6 +41,7 @@ import tqdm.contrib.logging
|
||||
import capa.main
|
||||
import capa.rules
|
||||
import capa.engine
|
||||
import capa.helpers
|
||||
import capa.features.insn
|
||||
import capa.features.common
|
||||
from capa.rules import Rule, RuleSet
|
||||
@@ -286,16 +287,16 @@ def get_sample_capabilities(ctx: Context, path: Path) -> Set[str]:
|
||||
logger.debug("found cached results: %s: %d capabilities", nice_path, len(ctx.capabilities_by_sample[path]))
|
||||
return ctx.capabilities_by_sample[path]
|
||||
|
||||
if nice_path.endswith(capa.main.EXTENSIONS_SHELLCODE_32):
|
||||
format = "sc32"
|
||||
elif nice_path.endswith(capa.main.EXTENSIONS_SHELLCODE_64):
|
||||
format = "sc64"
|
||||
if nice_path.endswith(capa.helpers.EXTENSIONS_SHELLCODE_32):
|
||||
format_ = "sc32"
|
||||
elif nice_path.endswith(capa.helpers.EXTENSIONS_SHELLCODE_64):
|
||||
format_ = "sc64"
|
||||
else:
|
||||
format = "auto"
|
||||
format_ = "auto"
|
||||
|
||||
logger.debug("analyzing sample: %s", nice_path)
|
||||
extractor = capa.main.get_extractor(
|
||||
nice_path, format, capa.main.BACKEND_VIV, DEFAULT_SIGNATURES, False, disable_progress=True
|
||||
nice_path, format_, capa.main.BACKEND_VIV, DEFAULT_SIGNATURES, False, disable_progress=True
|
||||
)
|
||||
|
||||
capabilities, _ = capa.main.find_capabilities(ctx.rules, extractor, disable_progress=True)
|
||||
|
||||
@@ -59,7 +59,9 @@ import colorama
|
||||
import capa.main
|
||||
import capa.rules
|
||||
import capa.engine
|
||||
import capa.helpers
|
||||
import capa.features
|
||||
import capa.exceptions
|
||||
import capa.render.utils as rutils
|
||||
import capa.features.freeze
|
||||
import capa.render.result_document
|
||||
@@ -162,25 +164,11 @@ def main(argv=None):
|
||||
extractor = capa.main.get_extractor(
|
||||
args.sample, args.format, args.backend, sig_paths, should_save_workspace
|
||||
)
|
||||
except capa.main.UnsupportedFormatError:
|
||||
logger.error("-" * 80)
|
||||
logger.error(" Input file does not appear to be a PE file.")
|
||||
logger.error(" ")
|
||||
logger.error(
|
||||
" capa currently only supports analyzing PE files (or shellcode, when using --format sc32|sc64)."
|
||||
)
|
||||
logger.error(" If you don't know the input file type, you can try using the `file` utility to guess it.")
|
||||
logger.error("-" * 80)
|
||||
except capa.exceptions.UnsupportedFormatError:
|
||||
capa.helpers.log_unsupported_format_error()
|
||||
return -1
|
||||
except capa.main.UnsupportedRuntimeError:
|
||||
logger.error("-" * 80)
|
||||
logger.error(" Unsupported runtime or Python interpreter.")
|
||||
logger.error(" ")
|
||||
logger.error(" capa supports running under Python 2.7 using Vivisect for binary analysis.")
|
||||
logger.error(" It can also run within IDA Pro, using either Python 2.7 or 3.5+.")
|
||||
logger.error(" ")
|
||||
logger.error(" If you're seeing this message on the command line, please ensure you're running Python 2.7.")
|
||||
logger.error("-" * 80)
|
||||
except capa.exceptions.UnsupportedRuntimeError:
|
||||
capa.helpers.log_unsupported_runtime_error()
|
||||
return -1
|
||||
|
||||
meta = capa.main.collect_metadata(argv, args.sample, args.rules, extractor)
|
||||
|
||||
@@ -75,8 +75,10 @@ import capa.rules
|
||||
import capa.engine
|
||||
import capa.helpers
|
||||
import capa.features
|
||||
import capa.exceptions
|
||||
import capa.features.common
|
||||
import capa.features.freeze
|
||||
from capa.helpers import log_unsupported_runtime_error
|
||||
|
||||
logger = logging.getLogger("capa.show-features")
|
||||
|
||||
@@ -113,25 +115,11 @@ def main(argv=None):
|
||||
extractor = capa.main.get_extractor(
|
||||
args.sample, args.format, args.backend, sig_paths, should_save_workspace
|
||||
)
|
||||
except capa.main.UnsupportedFormatError:
|
||||
logger.error("-" * 80)
|
||||
logger.error(" Input file does not appear to be a PE file.")
|
||||
logger.error(" ")
|
||||
logger.error(
|
||||
" capa currently only supports analyzing PE files (or shellcode, when using --format sc32|sc64)."
|
||||
)
|
||||
logger.error(" If you don't know the input file type, you can try using the `file` utility to guess it.")
|
||||
logger.error("-" * 80)
|
||||
except capa.exceptions.UnsupportedFormatError:
|
||||
capa.helpers.log_unsupported_format_error()
|
||||
return -1
|
||||
except capa.main.UnsupportedRuntimeError:
|
||||
logger.error("-" * 80)
|
||||
logger.error(" Unsupported runtime or Python interpreter.")
|
||||
logger.error(" ")
|
||||
logger.error(" capa supports running under Python 2.7 using Vivisect for binary analysis.")
|
||||
logger.error(" It can also run within IDA Pro, using either Python 2.7 or 3.5+.")
|
||||
logger.error(" ")
|
||||
logger.error(" If you're seeing this message on the command line, please ensure you're running Python 2.7.")
|
||||
logger.error("-" * 80)
|
||||
except capa.exceptions.UnsupportedRuntimeError:
|
||||
log_unsupported_runtime_error()
|
||||
return -1
|
||||
|
||||
if not args.function:
|
||||
|
||||
1
setup.py
1
setup.py
@@ -26,6 +26,7 @@ requirements = [
|
||||
"smda==1.7.1",
|
||||
"pefile==2021.9.3",
|
||||
"pyelftools==0.28",
|
||||
"dnfile==0.10.0",
|
||||
]
|
||||
|
||||
# this sets __version__
|
||||
|
||||
@@ -224,6 +224,8 @@ def get_data_path_by_name(name):
|
||||
return os.path.join(CD, "data", "79abd17391adc6251ecdc58d13d76baf.dll_")
|
||||
elif name.startswith("946a9"):
|
||||
return os.path.join(CD, "data", "946a99f36a46d335dec080d9a4371940.dll_")
|
||||
elif name.startswith("b9f5b"):
|
||||
return os.path.join(CD, "data", "b9f5bd514485fb06da39beff051b9fdc.exe_")
|
||||
else:
|
||||
raise ValueError("unexpected sample fixture: %s" % name)
|
||||
|
||||
@@ -276,7 +278,9 @@ def get_sample_md5_by_name(name):
|
||||
elif name.startswith("79abd"):
|
||||
return "79abd17391adc6251ecdc58d13d76baf"
|
||||
elif name.startswith("946a9"):
|
||||
return "946a99f36a46d335dec080d9a4371940.dll_"
|
||||
return "946a99f36a46d335dec080d9a4371940"
|
||||
elif name.startswith("b9f5b"):
|
||||
return "b9f5bd514485fb06da39beff051b9fdc"
|
||||
else:
|
||||
raise ValueError("unexpected sample fixture: %s" % name)
|
||||
|
||||
@@ -583,6 +587,16 @@ FEATURE_PRESENCE_TESTS = sorted(
|
||||
key=lambda t: (t[0], t[1]),
|
||||
)
|
||||
|
||||
FEATURE_PRESENCE_TESTS_DOTNET = sorted(
|
||||
[
|
||||
("b9f5b", "file", Arch(ARCH_I386), True),
|
||||
("b9f5b", "file", Arch(ARCH_AMD64), False),
|
||||
],
|
||||
# order tests by (file, item)
|
||||
# so that our LRU cache is most effective.
|
||||
key=lambda t: (t[0], t[1]),
|
||||
)
|
||||
|
||||
FEATURE_PRESENCE_TESTS_IDA = [
|
||||
# file/imports
|
||||
# IDA can recover more names of APIs imported by ordinal
|
||||
@@ -695,3 +709,8 @@ def al_khaser_x86_extractor():
|
||||
@pytest.fixture
|
||||
def pingtaest_extractor():
|
||||
return get_extractor(get_data_path_by_name("pingtaest"))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def b9f5b_extractor():
|
||||
return get_extractor(get_data_path_by_name("b9f5b"))
|
||||
|
||||
25
tests/test_dotnet_features.py
Normal file
25
tests/test_dotnet_features.py
Normal file
@@ -0,0 +1,25 @@
|
||||
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
# b9f5bd514485fb06da39beff051b9fdc
|
||||
|
||||
import pytest
|
||||
import fixtures
|
||||
from fixtures import *
|
||||
from fixtures import parametrize
|
||||
|
||||
import capa.features.file
|
||||
|
||||
|
||||
@parametrize(
|
||||
"sample,scope,feature,expected",
|
||||
fixtures.FEATURE_PRESENCE_TESTS_DOTNET,
|
||||
indirect=["sample", "scope"],
|
||||
)
|
||||
def test_dnfile_features(sample, scope, feature, expected):
|
||||
fixtures.do_test_feature_presence(fixtures.get_pefile_extractor, sample, scope, feature, expected)
|
||||
Reference in New Issue
Block a user