feat: start dotnet detection (#955)

* feat: start dotnet detection

* Apply suggestions from code review

Co-authored-by: Willi Ballenthin <willi.ballenthin@gmail.com>

* refactor: dn instead of dotnet

* refactor: format branches, extractor reorg

* refactor: format selection and dotnet detect

* feat: get format, arch, os

* refactor: log errors and exceptions

* ci: also test and build for dotnet-main dev

* fix: import path

* fix: circular dep

* fix: remove buf argument
feat: get runtime meta data

* fix: log unsupported runtime error

* fix: type ignore

Co-authored-by: Willi Ballenthin <willi.ballenthin@gmail.com>
This commit is contained in:
Moritz
2022-04-06 11:24:05 +02:00
committed by Moritz Raabe
parent de312d87dc
commit b5be876e61
18 changed files with 399 additions and 167 deletions

View File

@@ -2,7 +2,7 @@ name: build
on:
push:
branches: [master]
branches: [master, dotnet-main]
release:
types: [edited, published]

View File

@@ -2,9 +2,9 @@ name: CI
on:
push:
branches: [ master ]
branches: [ master, dotnet-main ]
pull_request:
branches: [ master ]
branches: [ master, dotnet-main ]
# save workspaces to speed up testing
env:

View File

@@ -4,14 +4,15 @@
### New Features
- add new scope "instruction" for matching mnemonics and operands #767 @williballenthin
- add new feature "operand[{0, 1, 2}].number" for matching instruction operand immediate values #767 @williballenthin
- add new feature "operand[{0, 1, 2}].offset" for matching instruction operand offsets #767 @williballenthin
- add new scope "instruction" for matching mnemonics and operands #767 @williballenthin
- add new feature "operand[{0, 1, 2}].number" for matching instruction operand immediate values #767 @williballenthin
- add new feature "operand[{0, 1, 2}].offset" for matching instruction operand offsets #767 @williballenthin
- main: detect dotnet binaries #955 @mr-tz
### Breaking Changes
- instruction scope and operand feature are new and are not backwards compatible with older versions of capa
- Python 3.7 is now the minimum supported Python version #866 @williballenthin
- instruction scope and operand feature are new and are not backwards compatible with older versions of capa
- Python 3.7 is now the minimum supported Python version #866 @williballenthin
### New Rules (4)

14
capa/exceptions.py Normal file
View File

@@ -0,0 +1,14 @@
class UnsupportedRuntimeError(RuntimeError):
pass
class UnsupportedFormatError(ValueError):
pass
class UnsupportedArchError(ValueError):
pass
class UnsupportedOSError(ValueError):
pass

View File

@@ -410,7 +410,9 @@ VALID_BITNESS = (BITNESS_X32, BITNESS_X64)
# other candidates here: https://docs.microsoft.com/en-us/windows/win32/debug/pe-format#machine-types
ARCH_I386 = "i386"
ARCH_AMD64 = "amd64"
VALID_ARCH = (ARCH_I386, ARCH_AMD64)
# dotnet
ARCH_ANY = "any"
VALID_ARCH = (ARCH_I386, ARCH_AMD64, ARCH_ANY)
class Arch(Feature):
@@ -422,8 +424,10 @@ class Arch(Feature):
OS_WINDOWS = "windows"
OS_LINUX = "linux"
OS_MACOS = "macos"
# dotnet
OS_ANY = "any"
VALID_OS = {os.value for os in capa.features.extractors.elf.OS}
VALID_OS.update({OS_WINDOWS, OS_LINUX, OS_MACOS})
VALID_OS.update({OS_WINDOWS, OS_LINUX, OS_MACOS, OS_ANY})
class OS(Feature):
@@ -434,7 +438,14 @@ class OS(Feature):
FORMAT_PE = "pe"
FORMAT_ELF = "elf"
VALID_FORMAT = (FORMAT_PE, FORMAT_ELF)
FORMAT_DOTNET = "dotnet"
VALID_FORMAT = (FORMAT_PE, FORMAT_ELF, FORMAT_DOTNET)
# internal only, not to be used in rules
FORMAT_AUTO = "auto"
FORMAT_SC32 = "sc32"
FORMAT_SC64 = "sc64"
FORMAT_FREEZE = "freeze"
FORMAT_UNKNOWN = "unknown"
class Format(Feature):

View File

@@ -8,7 +8,8 @@ import pefile
import capa.features
import capa.features.extractors.elf
import capa.features.extractors.pefile
from capa.features.common import OS, FORMAT_PE, FORMAT_ELF, OS_WINDOWS, Arch, Format, String
from capa.features.common import OS, FORMAT_PE, FORMAT_ELF, OS_WINDOWS, FORMAT_FREEZE, Arch, Format, String
from capa.features.freeze import is_freeze
logger = logging.getLogger(__name__)
@@ -29,6 +30,8 @@ def extract_format(buf):
yield Format(FORMAT_PE), 0x0
elif buf.startswith(b"\x7fELF"):
yield Format(FORMAT_ELF), 0x0
elif is_freeze(buf):
yield Format(FORMAT_FREEZE), 0x0
else:
# we likely end up here:
# 1. handling a file format (e.g. macho)

View File

@@ -0,0 +1,105 @@
import logging
from typing import Tuple, Iterator
import dnfile
from capa.features.common import OS, OS_ANY, ARCH_ANY, ARCH_I386, ARCH_AMD64, FORMAT_DOTNET, Arch, Format, Feature
from capa.features.extractors.base_extractor import FeatureExtractor
logger = logging.getLogger(__name__)
def extract_file_format(**kwargs):
yield Format(FORMAT_DOTNET), 0x0
def extract_file_os(**kwargs):
yield OS(OS_ANY), 0x0
def extract_file_arch(pe, **kwargs):
# TODO differences for versions < 4.5?
# via https://stackoverflow.com/a/23614024/10548020
if pe.net.Flags.CLR_32BITREQUIRED and pe.net.Flags.CLR_PREFER_32BIT:
yield Arch(ARCH_I386), 0x0
elif not pe.net.Flags.CLR_32BITREQUIRED and not pe.net.Flags.CLR_PREFER_32BIT:
yield Arch(ARCH_AMD64), 0x0
else:
yield Arch(ARCH_ANY), 0x0
def extract_file_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, int]]:
for file_handler in FILE_HANDLERS:
for feature, va in file_handler(pe=pe): # type: ignore
yield feature, va
FILE_HANDLERS = (
# extract_file_export_names,
# extract_file_import_names,
# extract_file_section_names,
# extract_file_strings,
# extract_file_function_names,
extract_file_format,
)
def extract_global_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, int]]:
for handler in GLOBAL_HANDLERS:
for feature, va in handler(pe=pe): # type: ignore
yield feature, va
GLOBAL_HANDLERS = (
extract_file_os,
extract_file_arch,
)
class DnfileFeatureExtractor(FeatureExtractor):
def __init__(self, path: str):
super(DnfileFeatureExtractor, self).__init__()
self.path: str = path
self.pe: dnfile.dnPE = dnfile.dnPE(path)
def get_base_address(self) -> int:
return self.pe.net.struct.EntryPointTokenOrRva
def extract_global_features(self):
yield from extract_global_features(self.pe)
def extract_file_features(self):
yield from extract_file_features(self.pe)
def is_dotnet_file(self) -> bool:
return bool(self.pe.net)
def get_runtime_version(self) -> Tuple[int, int]:
return self.pe.net.struct.MajorRuntimeVersion, self.pe.net.struct.MinorRuntimeVersion
def get_meta_version_string(self) -> str:
return self.pe.net.metadata.struct.Version.decode("utf-8")
def get_functions(self):
raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features")
def extract_function_features(self, f):
raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features")
def get_basic_blocks(self, f):
raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features")
def extract_basic_block_features(self, f, bb):
raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features")
def get_instructions(self, f, bb):
raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features")
def extract_insn_features(self, f, bb, insn):
raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features")
def is_library_function(self, va):
raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features")
def get_function_name(self, va):
raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features")

View File

@@ -53,13 +53,12 @@ import zlib
import logging
from typing import Dict, Type
import capa.helpers
import capa.features.file
import capa.features.insn
import capa.features.common
import capa.features.basicblock
import capa.features.extractors.base_extractor
from capa.helpers import hex
from capa.features.common import Feature
logger = logging.getLogger(__name__)
@@ -87,6 +86,7 @@ def dumps(extractor):
returns:
str: the serialized features.
"""
hex = capa.helpers.hex
ret = {
"version": 1,
"base address": extractor.get_base_address(),

View File

@@ -5,10 +5,20 @@
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import os
import logging
from typing import NoReturn
from capa.exceptions import UnsupportedFormatError
from capa.features.common import FORMAT_SC32, FORMAT_SC64, FORMAT_UNKNOWN
from capa.features.extractors.common import extract_format
EXTENSIONS_SHELLCODE_32 = ("sc32", "raw32")
EXTENSIONS_SHELLCODE_64 = ("sc64", "raw64")
logger = logging.getLogger("capa")
_hex = hex
@@ -35,3 +45,72 @@ def is_runtime_ida():
def assert_never(value: NoReturn) -> NoReturn:
assert False, f"Unhandled value: {value} ({type(value).__name__})"
def get_format_from_extension(sample: str) -> str:
if sample.endswith(EXTENSIONS_SHELLCODE_32):
return FORMAT_SC32
elif sample.endswith(EXTENSIONS_SHELLCODE_64):
return FORMAT_SC64
return FORMAT_UNKNOWN
def get_auto_format(path: str) -> str:
format_ = get_format(path)
if format_ == FORMAT_UNKNOWN:
format_ = get_format_from_extension(path)
if format_ == FORMAT_UNKNOWN:
raise UnsupportedFormatError()
return format_
def get_format(sample: str) -> str:
with open(sample, "rb") as f:
buf = f.read()
for feature, _ in extract_format(buf):
assert isinstance(feature.value, str)
return feature.value
return FORMAT_UNKNOWN
def log_unsupported_format_error():
logger.error("-" * 80)
logger.error(" Input file does not appear to be a PE or ELF file.")
logger.error(" ")
logger.error(
" capa currently only supports analyzing PE and ELF files (or shellcode, when using --format sc32|sc64)."
)
logger.error(" If you don't know the input file type, you can try using the `file` utility to guess it.")
logger.error("-" * 80)
def log_unsupported_os_error():
logger.error("-" * 80)
logger.error(" Input file does not appear to target a supported OS.")
logger.error(" ")
logger.error(
" capa currently only supports analyzing executables for some operating systems (including Windows and Linux)."
)
logger.error("-" * 80)
def log_unsupported_arch_error():
logger.error("-" * 80)
logger.error(" Input file does not appear to target a supported architecture.")
logger.error(" ")
logger.error(" capa currently only supports analyzing x86 (32- and 64-bit).")
logger.error("-" * 80)
def log_unsupported_runtime_error():
logger.error("-" * 80)
logger.error(" Unsupported runtime or Python interpreter.")
logger.error(" ")
logger.error(" capa supports running under Python 3.7 and higher.")
logger.error(" ")
logger.error(
" If you're seeing this message on the command line, please ensure you're running a supported Python version."
)
logger.error("-" * 80)

View File

@@ -41,18 +41,35 @@ import capa.render.vverbose
import capa.features.extractors
import capa.features.extractors.common
import capa.features.extractors.pefile
import capa.features.extractors.dnfile_
import capa.features.extractors.elffile
from capa.rules import Rule, Scope, RuleSet
from capa.engine import FeatureSet, MatchResults
from capa.helpers import get_file_taste
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle, FeatureExtractor
from capa.helpers import (
get_format,
get_file_taste,
get_auto_format,
log_unsupported_os_error,
log_unsupported_arch_error,
log_unsupported_format_error,
)
from capa.exceptions import UnsupportedOSError, UnsupportedArchError, UnsupportedFormatError, UnsupportedRuntimeError
from capa.features.common import (
FORMAT_PE,
FORMAT_ELF,
FORMAT_AUTO,
FORMAT_SC32,
FORMAT_SC64,
FORMAT_DOTNET,
FORMAT_FREEZE,
)
from capa.features.extractors.base_extractor import FunctionHandle, FeatureExtractor
RULES_PATH_DEFAULT_STRING = "(embedded rules)"
SIGNATURES_PATH_DEFAULT_STRING = "(embedded signatures)"
BACKEND_VIV = "vivisect"
BACKEND_SMDA = "smda"
EXTENSIONS_SHELLCODE_32 = ("sc32", "raw32")
EXTENSIONS_SHELLCODE_64 = ("sc64", "raw64")
BACKEND_DOTNET = "dotnet"
E_MISSING_RULES = -10
E_MISSING_FILE = -11
@@ -287,6 +304,7 @@ def find_capabilities(ruleset: RuleSet, extractor: FeatureExtractor, disable_pro
return matches, meta
# TODO move all to helpers?
def has_rule_with_namespace(rules, capabilities, rule_cat):
for rule_name in capabilities.keys():
if rules.rules[rule_name].meta.get("namespace", "").startswith(rule_cat):
@@ -334,17 +352,6 @@ def is_supported_format(sample: str) -> bool:
return len(list(capa.features.extractors.common.extract_format(taste))) == 1
def get_format(sample: str) -> str:
with open(sample, "rb") as f:
buf = f.read()
for feature, _ in capa.features.extractors.common.extract_format(buf):
assert isinstance(feature.value, str)
return feature.value
return "unknown"
def is_supported_arch(sample: str) -> bool:
with open(sample, "rb") as f:
buf = f.read()
@@ -433,19 +440,7 @@ def get_default_signatures() -> List[str]:
return ret
class UnsupportedFormatError(ValueError):
pass
class UnsupportedArchError(ValueError):
pass
class UnsupportedOSError(ValueError):
pass
def get_workspace(path, format, sigpaths):
def get_workspace(path, format_, sigpaths):
"""
load the program at the given path into a vivisect workspace using the given format.
also apply the given FLIRT signatures.
@@ -465,21 +460,22 @@ def get_workspace(path, format, sigpaths):
import viv_utils
logger.debug("generating vivisect workspace for: %s", path)
if format == "auto":
# TODO should not be auto at this point, anymore
if format_ == FORMAT_AUTO:
if not is_supported_format(path):
raise UnsupportedFormatError()
# don't analyze, so that we can add our Flirt function analyzer first.
vw = viv_utils.getWorkspace(path, analyze=False, should_save=False)
elif format in {"pe", "elf"}:
elif format_ in {FORMAT_PE, FORMAT_ELF}:
vw = viv_utils.getWorkspace(path, analyze=False, should_save=False)
elif format == "sc32":
elif format_ == FORMAT_SC32:
# these are not analyzed nor saved.
vw = viv_utils.getShellcodeWorkspaceFromFile(path, arch="i386", analyze=False)
elif format == "sc64":
elif format_ == FORMAT_SC64:
vw = viv_utils.getShellcodeWorkspaceFromFile(path, arch="amd64", analyze=False)
else:
raise ValueError("unexpected format: " + format)
raise ValueError("unexpected format: " + format_)
viv_utils.flirt.register_flirt_signature_analyzers(vw, sigpaths)
@@ -489,12 +485,9 @@ def get_workspace(path, format, sigpaths):
return vw
class UnsupportedRuntimeError(RuntimeError):
pass
# TODO get_extractors -> List[FeatureExtractor]?
def get_extractor(
path: str, format: str, backend: str, sigpaths: List[str], should_save_workspace=False, disable_progress=False
path: str, format_: str, backend: str, sigpaths: List[str], should_save_workspace=False, disable_progress=False
) -> FeatureExtractor:
"""
raises:
@@ -502,7 +495,7 @@ def get_extractor(
UnsupportedArchError
UnsupportedOSError
"""
if format not in ("sc32", "sc64"):
if format_ not in (FORMAT_SC32, FORMAT_SC64):
if not is_supported_format(path):
raise UnsupportedFormatError()
@@ -512,6 +505,10 @@ def get_extractor(
if not is_supported_os(path):
raise UnsupportedOSError()
if format_ == FORMAT_DOTNET:
# TODO return capa.features.extractors.dotnet.extractor.DnFeatureExtractor(...)
raise NotImplementedError("DnFeatureExtractor")
if backend == "smda":
from smda.SmdaConfig import SmdaConfig
from smda.Disassembler import Disassembler
@@ -530,7 +527,7 @@ def get_extractor(
import capa.features.extractors.viv.extractor
with halo.Halo(text="analyzing program", spinner="simpleDots", stream=sys.stderr, enabled=not disable_progress):
vw = get_workspace(path, format, sigpaths)
vw = get_workspace(path, format_, sigpaths)
if should_save_workspace:
logger.debug("saving workspace")
@@ -545,6 +542,22 @@ def get_extractor(
return capa.features.extractors.viv.extractor.VivisectFeatureExtractor(vw, path)
def get_file_extractors(sample: str, format_: str) -> List[FeatureExtractor]:
file_extractors: List[FeatureExtractor] = list()
if format_ == capa.features.extractors.common.FORMAT_PE:
file_extractors.append(capa.features.extractors.pefile.PefileFeatureExtractor(sample))
dnfile_extractor = capa.features.extractors.dnfile_.DnfileFeatureExtractor(sample)
if dnfile_extractor.is_dotnet_file():
file_extractors.append(dnfile_extractor)
elif format_ == capa.features.extractors.common.FORMAT_ELF:
file_extractors.append(capa.features.extractors.elffile.ElfFeatureExtractor(sample))
return file_extractors
def is_nursery_rule_path(path: str) -> bool:
"""
The nursery is a spot for rules that have not yet been fully polished.
@@ -652,7 +665,7 @@ def collect_metadata(argv, sample_path, rules_path, extractor):
if rules_path != RULES_PATH_DEFAULT_STRING:
rules_path = os.path.abspath(os.path.normpath(rules_path))
format = get_format(sample_path)
format_ = get_format(sample_path)
arch = get_arch(sample_path)
os_ = get_os(sample_path)
@@ -667,7 +680,7 @@ def collect_metadata(argv, sample_path, rules_path, extractor):
"path": os.path.normpath(sample_path),
},
"analysis": {
"format": format,
"format": format_,
"arch": arch,
"os": os_,
"extractor": extractor.__class__.__name__,
@@ -782,19 +795,20 @@ def install_common_args(parser, wanted=None):
if "format" in wanted:
formats = [
("auto", "(default) detect file type automatically"),
("pe", "Windows PE file"),
("elf", "Executable and Linkable Format"),
("sc32", "32-bit shellcode"),
("sc64", "64-bit shellcode"),
("freeze", "features previously frozen by capa"),
(FORMAT_AUTO, "(default) detect file type automatically"),
(FORMAT_PE, "Windows PE file"),
(FORMAT_DOTNET, ".NET PE file"),
(FORMAT_ELF, "Executable and Linkable Format"),
(FORMAT_SC32, "32-bit shellcode"),
(FORMAT_SC64, "64-bit shellcode"),
(FORMAT_FREEZE, "features previously frozen by capa"),
]
format_help = ", ".join(["%s: %s" % (f[0], f[1]) for f in formats])
parser.add_argument(
"-f",
"--format",
choices=[f[0] for f in formats],
default="auto",
default=FORMAT_AUTO,
help="select sample format, %s" % format_help,
)
@@ -963,13 +977,21 @@ def main(argv=None):
return ret
try:
taste = get_file_taste(args.sample)
_ = get_file_taste(args.sample)
except IOError as e:
# per our research there's not a programmatic way to render the IOError with non-ASCII filename unless we
# handle the IOError separately and reach into the args
logger.error("%s", e.args[0])
return E_MISSING_FILE
format_ = args.format
if format_ == FORMAT_AUTO:
try:
format_ = get_auto_format(args.sample)
except UnsupportedFormatError:
log_unsupported_format_error()
return E_INVALID_FILE_TYPE
try:
rules = get_rules(args.rules, disable_progress=args.quiet)
rules = capa.rules.RuleSet(rules)
@@ -991,26 +1013,23 @@ def main(argv=None):
logger.error("%s", str(e))
return E_INVALID_RULE
file_extractor = None
if args.format == "pe" or (args.format == "auto" and taste.startswith(b"MZ")):
# these pefile and elffile file feature extractors are pretty light weight: they don't do any code analysis.
# so we can fairly quickly determine if the given file has "pure" file-scope rules
# that indicate a limitation (like "file is packed based on section names")
# and avoid doing a full code analysis on difficult/impossible binaries.
try:
file_extractor = capa.features.extractors.pefile.PefileFeatureExtractor(args.sample)
except PEFormatError as e:
logger.error("Input file '%s' is not a valid PE file: %s", args.sample, str(e))
return E_CORRUPT_FILE
# file feature extractors are pretty lightweight: they don't do any code analysis.
# so we can fairly quickly determine if the given file has "pure" file-scope rules
# that indicate a limitation (like "file is packed based on section names")
# and avoid doing a full code analysis on difficult/impossible binaries.
#
# this pass can inspect multiple file extractors, e.g., dotnet and pe to identify
# various limitations
try:
file_extractors = get_file_extractors(args.sample, format_)
except PEFormatError as e:
logger.error("Input file '%s' is not a valid PE file: %s", args.sample, str(e))
return E_CORRUPT_FILE
except (ELFError, OverflowError) as e:
logger.error("Input file '%s' is not a valid ELF file: %s", args.sample, str(e))
return E_CORRUPT_FILE
elif args.format == "elf" or (args.format == "auto" and taste.startswith(b"\x7fELF")):
try:
file_extractor = capa.features.extractors.elffile.ElfFeatureExtractor(args.sample)
except (ELFError, OverflowError) as e:
logger.error("Input file '%s' is not a valid ELF file: %s", args.sample, str(e))
return E_CORRUPT_FILE
if file_extractor:
for file_extractor in file_extractors:
try:
pure_file_capabilities, _ = find_file_capabilities(rules, file_extractor, {})
except PEFormatError as e:
@@ -1029,58 +1048,37 @@ def main(argv=None):
logger.debug("file limitation short circuit, won't analyze fully.")
return E_FILE_LIMITATION
try:
if args.format == "pe" or (args.format == "auto" and taste.startswith(b"MZ")):
sig_paths = get_signatures(args.signatures)
else:
sig_paths = []
logger.debug("skipping library code matching: only have PE signatures")
except (IOError) as e:
logger.error("%s", str(e))
return E_INVALID_SIG
if isinstance(file_extractor, capa.features.extractors.dnfile_.DnfileFeatureExtractor):
format_ = FORMAT_DOTNET
if (args.format == "freeze") or (args.format == "auto" and capa.features.freeze.is_freeze(taste)):
format = "freeze"
if format_ == FORMAT_FREEZE:
with open(args.sample, "rb") as f:
extractor = capa.features.freeze.load(f.read())
else:
format = args.format
if format == "auto" and args.sample.endswith(EXTENSIONS_SHELLCODE_32):
format = "sc32"
elif format == "auto" and args.sample.endswith(EXTENSIONS_SHELLCODE_64):
format = "sc64"
try:
if format_ == FORMAT_PE:
sig_paths = get_signatures(args.signatures)
else:
sig_paths = []
logger.debug("skipping library code matching: only have native PE signatures")
except IOError as e:
logger.error("%s", str(e))
return E_INVALID_SIG
should_save_workspace = os.environ.get("CAPA_SAVE_WORKSPACE") not in ("0", "no", "NO", "n", None)
try:
extractor = get_extractor(
args.sample, format, args.backend, sig_paths, should_save_workspace, disable_progress=args.quiet
args.sample, format_, args.backend, sig_paths, should_save_workspace, disable_progress=args.quiet
)
except UnsupportedFormatError:
logger.error("-" * 80)
logger.error(" Input file does not appear to be a PE or ELF file.")
logger.error(" ")
logger.error(
" capa currently only supports analyzing PE and ELF files (or shellcode, when using --format sc32|sc64)."
)
logger.error(" If you don't know the input file type, you can try using the `file` utility to guess it.")
logger.error("-" * 80)
log_unsupported_format_error()
return E_INVALID_FILE_TYPE
except UnsupportedArchError:
logger.error("-" * 80)
logger.error(" Input file does not appear to target a supported architecture.")
logger.error(" ")
logger.error(" capa currently only supports analyzing x86 (32- and 64-bit).")
logger.error("-" * 80)
log_unsupported_arch_error()
return E_INVALID_FILE_ARCH
except UnsupportedOSError:
logger.error("-" * 80)
logger.error(" Input file does not appear to target a supported OS.")
logger.error(" ")
logger.error(
" capa currently only supports analyzing executables for some operating systems (including Windows and Linux)."
)
logger.error("-" * 80)
log_unsupported_os_error()
return E_INVALID_FILE_OS
meta = collect_metadata(argv, args.sample, args.rules, extractor)

View File

@@ -7,9 +7,9 @@
# See the License for the specific language governing permissions and limitations under the License.
import json
import capa.render.result_document
from capa.rules import RuleSet
from capa.engine import MatchResults
from capa.render.result_document import convert_capabilities_to_result_document
class CapaJsonObjectEncoder(json.JSONEncoder):
@@ -27,7 +27,7 @@ class CapaJsonObjectEncoder(json.JSONEncoder):
def render(meta, rules: RuleSet, capabilities: MatchResults) -> str:
return json.dumps(
capa.render.result_document.convert_capabilities_to_result_document(meta, rules, capabilities),
convert_capabilities_to_result_document(meta, rules, capabilities),
cls=CapaJsonObjectEncoder,
sort_keys=True,
)

View File

@@ -7,7 +7,6 @@
# See the License for the specific language governing permissions and limitations under the License.
import copy
import capa.rules
import capa.engine
import capa.render.utils
import capa.features.common

View File

@@ -41,6 +41,7 @@ import tqdm.contrib.logging
import capa.main
import capa.rules
import capa.engine
import capa.helpers
import capa.features.insn
import capa.features.common
from capa.rules import Rule, RuleSet
@@ -286,16 +287,16 @@ def get_sample_capabilities(ctx: Context, path: Path) -> Set[str]:
logger.debug("found cached results: %s: %d capabilities", nice_path, len(ctx.capabilities_by_sample[path]))
return ctx.capabilities_by_sample[path]
if nice_path.endswith(capa.main.EXTENSIONS_SHELLCODE_32):
format = "sc32"
elif nice_path.endswith(capa.main.EXTENSIONS_SHELLCODE_64):
format = "sc64"
if nice_path.endswith(capa.helpers.EXTENSIONS_SHELLCODE_32):
format_ = "sc32"
elif nice_path.endswith(capa.helpers.EXTENSIONS_SHELLCODE_64):
format_ = "sc64"
else:
format = "auto"
format_ = "auto"
logger.debug("analyzing sample: %s", nice_path)
extractor = capa.main.get_extractor(
nice_path, format, capa.main.BACKEND_VIV, DEFAULT_SIGNATURES, False, disable_progress=True
nice_path, format_, capa.main.BACKEND_VIV, DEFAULT_SIGNATURES, False, disable_progress=True
)
capabilities, _ = capa.main.find_capabilities(ctx.rules, extractor, disable_progress=True)

View File

@@ -59,7 +59,9 @@ import colorama
import capa.main
import capa.rules
import capa.engine
import capa.helpers
import capa.features
import capa.exceptions
import capa.render.utils as rutils
import capa.features.freeze
import capa.render.result_document
@@ -162,25 +164,11 @@ def main(argv=None):
extractor = capa.main.get_extractor(
args.sample, args.format, args.backend, sig_paths, should_save_workspace
)
except capa.main.UnsupportedFormatError:
logger.error("-" * 80)
logger.error(" Input file does not appear to be a PE file.")
logger.error(" ")
logger.error(
" capa currently only supports analyzing PE files (or shellcode, when using --format sc32|sc64)."
)
logger.error(" If you don't know the input file type, you can try using the `file` utility to guess it.")
logger.error("-" * 80)
except capa.exceptions.UnsupportedFormatError:
capa.helpers.log_unsupported_format_error()
return -1
except capa.main.UnsupportedRuntimeError:
logger.error("-" * 80)
logger.error(" Unsupported runtime or Python interpreter.")
logger.error(" ")
logger.error(" capa supports running under Python 2.7 using Vivisect for binary analysis.")
logger.error(" It can also run within IDA Pro, using either Python 2.7 or 3.5+.")
logger.error(" ")
logger.error(" If you're seeing this message on the command line, please ensure you're running Python 2.7.")
logger.error("-" * 80)
except capa.exceptions.UnsupportedRuntimeError:
capa.helpers.log_unsupported_runtime_error()
return -1
meta = capa.main.collect_metadata(argv, args.sample, args.rules, extractor)

View File

@@ -75,8 +75,10 @@ import capa.rules
import capa.engine
import capa.helpers
import capa.features
import capa.exceptions
import capa.features.common
import capa.features.freeze
from capa.helpers import log_unsupported_runtime_error
logger = logging.getLogger("capa.show-features")
@@ -113,25 +115,11 @@ def main(argv=None):
extractor = capa.main.get_extractor(
args.sample, args.format, args.backend, sig_paths, should_save_workspace
)
except capa.main.UnsupportedFormatError:
logger.error("-" * 80)
logger.error(" Input file does not appear to be a PE file.")
logger.error(" ")
logger.error(
" capa currently only supports analyzing PE files (or shellcode, when using --format sc32|sc64)."
)
logger.error(" If you don't know the input file type, you can try using the `file` utility to guess it.")
logger.error("-" * 80)
except capa.exceptions.UnsupportedFormatError:
capa.helpers.log_unsupported_format_error()
return -1
except capa.main.UnsupportedRuntimeError:
logger.error("-" * 80)
logger.error(" Unsupported runtime or Python interpreter.")
logger.error(" ")
logger.error(" capa supports running under Python 2.7 using Vivisect for binary analysis.")
logger.error(" It can also run within IDA Pro, using either Python 2.7 or 3.5+.")
logger.error(" ")
logger.error(" If you're seeing this message on the command line, please ensure you're running Python 2.7.")
logger.error("-" * 80)
except capa.exceptions.UnsupportedRuntimeError:
log_unsupported_runtime_error()
return -1
if not args.function:

View File

@@ -26,6 +26,7 @@ requirements = [
"smda==1.7.1",
"pefile==2021.9.3",
"pyelftools==0.28",
"dnfile==0.10.0",
]
# this sets __version__

View File

@@ -224,6 +224,8 @@ def get_data_path_by_name(name):
return os.path.join(CD, "data", "79abd17391adc6251ecdc58d13d76baf.dll_")
elif name.startswith("946a9"):
return os.path.join(CD, "data", "946a99f36a46d335dec080d9a4371940.dll_")
elif name.startswith("b9f5b"):
return os.path.join(CD, "data", "b9f5bd514485fb06da39beff051b9fdc.exe_")
else:
raise ValueError("unexpected sample fixture: %s" % name)
@@ -276,7 +278,9 @@ def get_sample_md5_by_name(name):
elif name.startswith("79abd"):
return "79abd17391adc6251ecdc58d13d76baf"
elif name.startswith("946a9"):
return "946a99f36a46d335dec080d9a4371940.dll_"
return "946a99f36a46d335dec080d9a4371940"
elif name.startswith("b9f5b"):
return "b9f5bd514485fb06da39beff051b9fdc"
else:
raise ValueError("unexpected sample fixture: %s" % name)
@@ -583,6 +587,16 @@ FEATURE_PRESENCE_TESTS = sorted(
key=lambda t: (t[0], t[1]),
)
FEATURE_PRESENCE_TESTS_DOTNET = sorted(
[
("b9f5b", "file", Arch(ARCH_I386), True),
("b9f5b", "file", Arch(ARCH_AMD64), False),
],
# order tests by (file, item)
# so that our LRU cache is most effective.
key=lambda t: (t[0], t[1]),
)
FEATURE_PRESENCE_TESTS_IDA = [
# file/imports
# IDA can recover more names of APIs imported by ordinal
@@ -695,3 +709,8 @@ def al_khaser_x86_extractor():
@pytest.fixture
def pingtaest_extractor():
return get_extractor(get_data_path_by_name("pingtaest"))
@pytest.fixture
def b9f5b_extractor():
return get_extractor(get_data_path_by_name("b9f5b"))

View File

@@ -0,0 +1,25 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
# b9f5bd514485fb06da39beff051b9fdc
import pytest
import fixtures
from fixtures import *
from fixtures import parametrize
import capa.features.file
@parametrize(
"sample,scope,feature,expected",
fixtures.FEATURE_PRESENCE_TESTS_DOTNET,
indirect=["sample", "scope"],
)
def test_dnfile_features(sample, scope, feature, expected):
fixtures.do_test_feature_presence(fixtures.get_pefile_extractor, sample, scope, feature, expected)