mirror of
https://github.com/mandiant/capa.git
synced 2025-12-12 23:59:48 -08:00
add format to global features and code refactors (#1284)
* refactor: get format handling * add format to global features
This commit is contained in:
@@ -91,6 +91,7 @@
|
||||
- show-features: better render strings with embedded whitespace #1267 @williballenthin
|
||||
- handle vivisect bug around strings at instruction level, use min length 4 #1271 @williballenthin @mr-tz
|
||||
- extractor: guard against invalid "calls from" features #1177 @mr-tz
|
||||
- extractor: add format to global features #1258 @mr-tz
|
||||
|
||||
### capa explorer IDA Pro plugin
|
||||
- fix: display instruction items #1154 @mr-tz
|
||||
|
||||
@@ -8,13 +8,13 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from enum import Enum
|
||||
from typing import Dict, List, Tuple, Union, Iterator, Optional
|
||||
|
||||
import dnfile
|
||||
from dncil.cil.opcode import OpCodes
|
||||
|
||||
import capa.features.extractors
|
||||
import capa.features.extractors.dotnetfile
|
||||
import capa.features.extractors.dnfile.file
|
||||
import capa.features.extractors.dnfile.insn
|
||||
import capa.features.extractors.dnfile.function
|
||||
@@ -78,6 +78,7 @@ class DnfileFeatureExtractor(FeatureExtractor):
|
||||
|
||||
# pre-compute these because we'll yield them at *every* scope.
|
||||
self.global_features: List[Tuple[Feature, Address]] = []
|
||||
self.global_features.extend(capa.features.extractors.dotnetfile.extract_file_format())
|
||||
self.global_features.extend(capa.features.extractors.dotnetfile.extract_file_os(pe=self.pe))
|
||||
self.global_features.extend(capa.features.extractors.dotnetfile.extract_file_arch(pe=self.pe))
|
||||
|
||||
|
||||
@@ -25,6 +25,7 @@ class IdaFeatureExtractor(FeatureExtractor):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.global_features: List[Tuple[Feature, Address]] = []
|
||||
self.global_features.extend(capa.features.extractors.ida.file.extract_file_format())
|
||||
self.global_features.extend(capa.features.extractors.ida.global_.extract_os())
|
||||
self.global_features.extend(capa.features.extractors.ida.global_.extract_arch())
|
||||
|
||||
|
||||
@@ -34,6 +34,7 @@ class VivisectFeatureExtractor(FeatureExtractor):
|
||||
|
||||
# pre-compute these because we'll yield them at *every* scope.
|
||||
self.global_features: List[Tuple[Feature, Address]] = []
|
||||
self.global_features.extend(capa.features.extractors.viv.file.extract_file_format(self.buf))
|
||||
self.global_features.extend(capa.features.extractors.common.extract_os(self.buf))
|
||||
self.global_features.extend(capa.features.extractors.viv.global_.extract_arch(self.vw))
|
||||
|
||||
|
||||
@@ -10,7 +10,7 @@ import logging
|
||||
from typing import NoReturn
|
||||
|
||||
from capa.exceptions import UnsupportedFormatError
|
||||
from capa.features.common import FORMAT_SC32, FORMAT_SC64, FORMAT_UNKNOWN
|
||||
from capa.features.common import FORMAT_PE, FORMAT_SC32, FORMAT_SC64, FORMAT_DOTNET, FORMAT_UNKNOWN, Format
|
||||
|
||||
EXTENSIONS_SHELLCODE_32 = ("sc32", "raw32")
|
||||
EXTENSIONS_SHELLCODE_64 = ("sc64", "raw64")
|
||||
@@ -68,11 +68,17 @@ def get_auto_format(path: str) -> str:
|
||||
def get_format(sample: str) -> str:
|
||||
# imported locally to avoid import cycle
|
||||
from capa.features.extractors.common import extract_format
|
||||
from capa.features.extractors.dnfile_ import DnfileFeatureExtractor
|
||||
|
||||
with open(sample, "rb") as f:
|
||||
buf = f.read()
|
||||
|
||||
for feature, _ in extract_format(buf):
|
||||
if feature == Format(FORMAT_PE):
|
||||
dnfile_extractor = DnfileFeatureExtractor(sample)
|
||||
if dnfile_extractor.is_dotnet_file():
|
||||
feature = Format(FORMAT_DOTNET)
|
||||
|
||||
assert isinstance(feature.value, str)
|
||||
return feature.value
|
||||
|
||||
|
||||
20
capa/main.py
20
capa/main.py
@@ -20,7 +20,7 @@ import textwrap
|
||||
import itertools
|
||||
import contextlib
|
||||
import collections
|
||||
from typing import Any, Dict, List, Tuple, Optional
|
||||
from typing import Any, Dict, List, Tuple
|
||||
|
||||
import halo
|
||||
import tqdm
|
||||
@@ -535,12 +535,12 @@ def get_extractor(
|
||||
def get_file_extractors(sample: str, format_: str) -> List[FeatureExtractor]:
|
||||
file_extractors: List[FeatureExtractor] = list()
|
||||
|
||||
if format_ == capa.features.extractors.common.FORMAT_PE:
|
||||
if format_ == FORMAT_PE:
|
||||
file_extractors.append(capa.features.extractors.pefile.PefileFeatureExtractor(sample))
|
||||
|
||||
dnfile_extractor = capa.features.extractors.dnfile_.DnfileFeatureExtractor(sample)
|
||||
if dnfile_extractor.is_dotnet_file():
|
||||
file_extractors.append(dnfile_extractor)
|
||||
elif format_ == FORMAT_DOTNET:
|
||||
file_extractors.append(capa.features.extractors.pefile.PefileFeatureExtractor(sample))
|
||||
file_extractors.append(capa.features.extractors.dnfile_.DnfileFeatureExtractor(sample))
|
||||
|
||||
elif format_ == capa.features.extractors.common.FORMAT_ELF:
|
||||
file_extractors.append(capa.features.extractors.elffile.ElfFeatureExtractor(sample))
|
||||
@@ -646,7 +646,6 @@ def collect_metadata(
|
||||
sample_path: str,
|
||||
rules_path: List[str],
|
||||
extractor: capa.features.extractors.base_extractor.FeatureExtractor,
|
||||
format_: Optional[str] = None,
|
||||
):
|
||||
md5 = hashlib.md5()
|
||||
sha1 = hashlib.sha1()
|
||||
@@ -662,7 +661,6 @@ def collect_metadata(
|
||||
if rules_path != [RULES_PATH_DEFAULT_STRING]:
|
||||
rules_path = [os.path.abspath(os.path.normpath(r)) for r in rules_path]
|
||||
|
||||
if format_ is None:
|
||||
format_ = get_format(sample_path)
|
||||
arch = get_arch(sample_path)
|
||||
os_ = get_os(sample_path)
|
||||
@@ -996,6 +994,9 @@ def main(argv=None):
|
||||
if format_ == FORMAT_AUTO:
|
||||
try:
|
||||
format_ = get_auto_format(args.sample)
|
||||
except PEFormatError as e:
|
||||
logger.error("Input file '%s' is not a valid PE file: %s", args.sample, str(e))
|
||||
return E_CORRUPT_FILE
|
||||
except UnsupportedFormatError:
|
||||
log_unsupported_format_error()
|
||||
return E_INVALID_FILE_TYPE
|
||||
@@ -1058,9 +1059,6 @@ def main(argv=None):
|
||||
logger.error("Input file '%s' is not a valid ELF file: %s", args.sample, str(e))
|
||||
return E_CORRUPT_FILE
|
||||
|
||||
if isinstance(file_extractor, capa.features.extractors.dnfile_.DnfileFeatureExtractor):
|
||||
format_ = FORMAT_DOTNET
|
||||
|
||||
# file limitations that rely on non-file scope won't be detected here.
|
||||
# nor on FunctionName features, because pefile doesn't support this.
|
||||
if has_file_limitation(rules, pure_file_capabilities):
|
||||
@@ -1100,7 +1098,7 @@ def main(argv=None):
|
||||
log_unsupported_os_error()
|
||||
return E_INVALID_FILE_OS
|
||||
|
||||
meta = collect_metadata(argv, args.sample, args.rules, extractor, format_=format_)
|
||||
meta = collect_metadata(argv, args.sample, args.rules, extractor)
|
||||
|
||||
capabilities, counts = find_capabilities(rules, extractor, disable_progress=args.quiet)
|
||||
meta["analysis"].update(counts)
|
||||
|
||||
@@ -307,11 +307,7 @@ def get_sample_capabilities(ctx: Context, path: Path) -> Set[str]:
|
||||
elif nice_path.endswith(capa.helpers.EXTENSIONS_SHELLCODE_64):
|
||||
format_ = "sc64"
|
||||
else:
|
||||
format_ = "auto"
|
||||
if not nice_path.endswith(capa.helpers.EXTENSIONS_ELF):
|
||||
dnfile_extractor = capa.features.extractors.dnfile_.DnfileFeatureExtractor(nice_path)
|
||||
if dnfile_extractor.is_dotnet_file():
|
||||
format_ = FORMAT_DOTNET
|
||||
format_ = capa.main.get_auto_format(nice_path)
|
||||
|
||||
logger.debug("analyzing sample: %s", nice_path)
|
||||
extractor = capa.main.get_extractor(nice_path, format_, "", DEFAULT_SIGNATURES, False, disable_progress=True)
|
||||
|
||||
@@ -175,7 +175,7 @@ def main(argv=None):
|
||||
capa.helpers.log_unsupported_runtime_error()
|
||||
return -1
|
||||
|
||||
meta = capa.main.collect_metadata(argv, args.sample, args.rules, extractor, format_=format_)
|
||||
meta = capa.main.collect_metadata(argv, args.sample, args.rules, extractor)
|
||||
capabilities, counts = capa.main.find_capabilities(rules, extractor)
|
||||
meta["analysis"].update(counts)
|
||||
meta["analysis"]["layout"] = capa.main.compute_layout(rules, extractor, capabilities)
|
||||
|
||||
@@ -689,14 +689,22 @@ FEATURE_PRESENCE_TESTS = sorted(
|
||||
# os & format & arch
|
||||
("pma16-01", "file", OS(OS_WINDOWS), True),
|
||||
("pma16-01", "file", OS(OS_LINUX), False),
|
||||
("mimikatz", "file", OS(OS_WINDOWS), True),
|
||||
("pma16-01", "function=0x404356", OS(OS_WINDOWS), True),
|
||||
("pma16-01", "function=0x404356,bb=0x4043B9", OS(OS_WINDOWS), True),
|
||||
("mimikatz", "function=0x40105D", OS(OS_WINDOWS), True),
|
||||
("pma16-01", "file", Arch(ARCH_I386), True),
|
||||
("pma16-01", "file", Arch(ARCH_AMD64), False),
|
||||
("mimikatz", "file", Arch(ARCH_I386), True),
|
||||
("pma16-01", "function=0x404356", Arch(ARCH_I386), True),
|
||||
("pma16-01", "function=0x404356,bb=0x4043B9", Arch(ARCH_I386), True),
|
||||
("mimikatz", "function=0x40105D", Arch(ARCH_I386), True),
|
||||
("pma16-01", "file", Format(FORMAT_PE), True),
|
||||
("pma16-01", "file", Format(FORMAT_ELF), False),
|
||||
("mimikatz", "file", Format(FORMAT_PE), True),
|
||||
# format is also a global feature
|
||||
("pma16-01", "function=0x404356", Format(FORMAT_PE), True),
|
||||
("mimikatz", "function=0x456BB9", Format(FORMAT_PE), True),
|
||||
# elf support
|
||||
("7351f.elf", "file", OS(OS_LINUX), True),
|
||||
("7351f.elf", "file", OS(OS_WINDOWS), False),
|
||||
|
||||
Reference in New Issue
Block a user