add format to global features and code refactors (#1284)

* refactor: get format handling

* add format to global features
This commit is contained in:
Moritz
2023-01-19 13:31:00 +01:00
committed by GitHub
parent 0fb3be359f
commit fa0ddba436
9 changed files with 32 additions and 20 deletions

View File

@@ -91,6 +91,7 @@
- show-features: better render strings with embedded whitespace #1267 @williballenthin
- handle vivisect bug around strings at instruction level, use min length 4 #1271 @williballenthin @mr-tz
- extractor: guard against invalid "calls from" features #1177 @mr-tz
- extractor: add format to global features #1258 @mr-tz
### capa explorer IDA Pro plugin
- fix: display instruction items #1154 @mr-tz

View File

@@ -8,13 +8,13 @@
from __future__ import annotations
from enum import Enum
from typing import Dict, List, Tuple, Union, Iterator, Optional
import dnfile
from dncil.cil.opcode import OpCodes
import capa.features.extractors
import capa.features.extractors.dotnetfile
import capa.features.extractors.dnfile.file
import capa.features.extractors.dnfile.insn
import capa.features.extractors.dnfile.function
@@ -78,6 +78,7 @@ class DnfileFeatureExtractor(FeatureExtractor):
# pre-compute these because we'll yield them at *every* scope.
self.global_features: List[Tuple[Feature, Address]] = []
self.global_features.extend(capa.features.extractors.dotnetfile.extract_file_format())
self.global_features.extend(capa.features.extractors.dotnetfile.extract_file_os(pe=self.pe))
self.global_features.extend(capa.features.extractors.dotnetfile.extract_file_arch(pe=self.pe))

View File

@@ -25,6 +25,7 @@ class IdaFeatureExtractor(FeatureExtractor):
def __init__(self):
super().__init__()
self.global_features: List[Tuple[Feature, Address]] = []
self.global_features.extend(capa.features.extractors.ida.file.extract_file_format())
self.global_features.extend(capa.features.extractors.ida.global_.extract_os())
self.global_features.extend(capa.features.extractors.ida.global_.extract_arch())

View File

@@ -34,6 +34,7 @@ class VivisectFeatureExtractor(FeatureExtractor):
# pre-compute these because we'll yield them at *every* scope.
self.global_features: List[Tuple[Feature, Address]] = []
self.global_features.extend(capa.features.extractors.viv.file.extract_file_format(self.buf))
self.global_features.extend(capa.features.extractors.common.extract_os(self.buf))
self.global_features.extend(capa.features.extractors.viv.global_.extract_arch(self.vw))

View File

@@ -10,7 +10,7 @@ import logging
from typing import NoReturn
from capa.exceptions import UnsupportedFormatError
from capa.features.common import FORMAT_SC32, FORMAT_SC64, FORMAT_UNKNOWN
from capa.features.common import FORMAT_PE, FORMAT_SC32, FORMAT_SC64, FORMAT_DOTNET, FORMAT_UNKNOWN, Format
EXTENSIONS_SHELLCODE_32 = ("sc32", "raw32")
EXTENSIONS_SHELLCODE_64 = ("sc64", "raw64")
@@ -68,11 +68,17 @@ def get_auto_format(path: str) -> str:
def get_format(sample: str) -> str:
# imported locally to avoid import cycle
from capa.features.extractors.common import extract_format
from capa.features.extractors.dnfile_ import DnfileFeatureExtractor
with open(sample, "rb") as f:
buf = f.read()
for feature, _ in extract_format(buf):
if feature == Format(FORMAT_PE):
dnfile_extractor = DnfileFeatureExtractor(sample)
if dnfile_extractor.is_dotnet_file():
feature = Format(FORMAT_DOTNET)
assert isinstance(feature.value, str)
return feature.value

View File

@@ -20,7 +20,7 @@ import textwrap
import itertools
import contextlib
import collections
from typing import Any, Dict, List, Tuple, Optional
from typing import Any, Dict, List, Tuple
import halo
import tqdm
@@ -535,12 +535,12 @@ def get_extractor(
def get_file_extractors(sample: str, format_: str) -> List[FeatureExtractor]:
file_extractors: List[FeatureExtractor] = list()
if format_ == capa.features.extractors.common.FORMAT_PE:
if format_ == FORMAT_PE:
file_extractors.append(capa.features.extractors.pefile.PefileFeatureExtractor(sample))
dnfile_extractor = capa.features.extractors.dnfile_.DnfileFeatureExtractor(sample)
if dnfile_extractor.is_dotnet_file():
file_extractors.append(dnfile_extractor)
elif format_ == FORMAT_DOTNET:
file_extractors.append(capa.features.extractors.pefile.PefileFeatureExtractor(sample))
file_extractors.append(capa.features.extractors.dnfile_.DnfileFeatureExtractor(sample))
elif format_ == capa.features.extractors.common.FORMAT_ELF:
file_extractors.append(capa.features.extractors.elffile.ElfFeatureExtractor(sample))
@@ -646,7 +646,6 @@ def collect_metadata(
sample_path: str,
rules_path: List[str],
extractor: capa.features.extractors.base_extractor.FeatureExtractor,
format_: Optional[str] = None,
):
md5 = hashlib.md5()
sha1 = hashlib.sha1()
@@ -662,8 +661,7 @@ def collect_metadata(
if rules_path != [RULES_PATH_DEFAULT_STRING]:
rules_path = [os.path.abspath(os.path.normpath(r)) for r in rules_path]
if format_ is None:
format_ = get_format(sample_path)
format_ = get_format(sample_path)
arch = get_arch(sample_path)
os_ = get_os(sample_path)
@@ -996,6 +994,9 @@ def main(argv=None):
if format_ == FORMAT_AUTO:
try:
format_ = get_auto_format(args.sample)
except PEFormatError as e:
logger.error("Input file '%s' is not a valid PE file: %s", args.sample, str(e))
return E_CORRUPT_FILE
except UnsupportedFormatError:
log_unsupported_format_error()
return E_INVALID_FILE_TYPE
@@ -1058,9 +1059,6 @@ def main(argv=None):
logger.error("Input file '%s' is not a valid ELF file: %s", args.sample, str(e))
return E_CORRUPT_FILE
if isinstance(file_extractor, capa.features.extractors.dnfile_.DnfileFeatureExtractor):
format_ = FORMAT_DOTNET
# file limitations that rely on non-file scope won't be detected here.
# nor on FunctionName features, because pefile doesn't support this.
if has_file_limitation(rules, pure_file_capabilities):
@@ -1100,7 +1098,7 @@ def main(argv=None):
log_unsupported_os_error()
return E_INVALID_FILE_OS
meta = collect_metadata(argv, args.sample, args.rules, extractor, format_=format_)
meta = collect_metadata(argv, args.sample, args.rules, extractor)
capabilities, counts = find_capabilities(rules, extractor, disable_progress=args.quiet)
meta["analysis"].update(counts)

View File

@@ -307,11 +307,7 @@ def get_sample_capabilities(ctx: Context, path: Path) -> Set[str]:
elif nice_path.endswith(capa.helpers.EXTENSIONS_SHELLCODE_64):
format_ = "sc64"
else:
format_ = "auto"
if not nice_path.endswith(capa.helpers.EXTENSIONS_ELF):
dnfile_extractor = capa.features.extractors.dnfile_.DnfileFeatureExtractor(nice_path)
if dnfile_extractor.is_dotnet_file():
format_ = FORMAT_DOTNET
format_ = capa.main.get_auto_format(nice_path)
logger.debug("analyzing sample: %s", nice_path)
extractor = capa.main.get_extractor(nice_path, format_, "", DEFAULT_SIGNATURES, False, disable_progress=True)

View File

@@ -175,7 +175,7 @@ def main(argv=None):
capa.helpers.log_unsupported_runtime_error()
return -1
meta = capa.main.collect_metadata(argv, args.sample, args.rules, extractor, format_=format_)
meta = capa.main.collect_metadata(argv, args.sample, args.rules, extractor)
capabilities, counts = capa.main.find_capabilities(rules, extractor)
meta["analysis"].update(counts)
meta["analysis"]["layout"] = capa.main.compute_layout(rules, extractor, capabilities)

View File

@@ -689,14 +689,22 @@ FEATURE_PRESENCE_TESTS = sorted(
# os & format & arch
("pma16-01", "file", OS(OS_WINDOWS), True),
("pma16-01", "file", OS(OS_LINUX), False),
("mimikatz", "file", OS(OS_WINDOWS), True),
("pma16-01", "function=0x404356", OS(OS_WINDOWS), True),
("pma16-01", "function=0x404356,bb=0x4043B9", OS(OS_WINDOWS), True),
("mimikatz", "function=0x40105D", OS(OS_WINDOWS), True),
("pma16-01", "file", Arch(ARCH_I386), True),
("pma16-01", "file", Arch(ARCH_AMD64), False),
("mimikatz", "file", Arch(ARCH_I386), True),
("pma16-01", "function=0x404356", Arch(ARCH_I386), True),
("pma16-01", "function=0x404356,bb=0x4043B9", Arch(ARCH_I386), True),
("mimikatz", "function=0x40105D", Arch(ARCH_I386), True),
("pma16-01", "file", Format(FORMAT_PE), True),
("pma16-01", "file", Format(FORMAT_ELF), False),
("mimikatz", "file", Format(FORMAT_PE), True),
# format is also a global feature
("pma16-01", "function=0x404356", Format(FORMAT_PE), True),
("mimikatz", "function=0x456BB9", Format(FORMAT_PE), True),
# elf support
("7351f.elf", "file", OS(OS_LINUX), True),
("7351f.elf", "file", OS(OS_WINDOWS), False),