diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index ddc8e2d0..7be4cdc9 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -2,7 +2,7 @@ name: build on: push: - branches: [master] + branches: [master, dotnet-main] release: types: [edited, published] diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 1f3cc7f7..34eda0e0 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -2,9 +2,9 @@ name: CI on: push: - branches: [ master ] + branches: [ master, dotnet-main ] pull_request: - branches: [ master ] + branches: [ master, dotnet-main ] # save workspaces to speed up testing env: diff --git a/CHANGELOG.md b/CHANGELOG.md index 474b80be..84b6b3d1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,14 +4,15 @@ ### New Features - - add new scope "instruction" for matching mnemonics and operands #767 @williballenthin - - add new feature "operand[{0, 1, 2}].number" for matching instruction operand immediate values #767 @williballenthin - - add new feature "operand[{0, 1, 2}].offset" for matching instruction operand offsets #767 @williballenthin +- add new scope "instruction" for matching mnemonics and operands #767 @williballenthin +- add new feature "operand[{0, 1, 2}].number" for matching instruction operand immediate values #767 @williballenthin +- add new feature "operand[{0, 1, 2}].offset" for matching instruction operand offsets #767 @williballenthin +- main: detect dotnet binaries #955 @mr-tz ### Breaking Changes - - instruction scope and operand feature are new and are not backwards compatible with older versions of capa - - Python 3.7 is now the minimum supported Python version #866 @williballenthin +- instruction scope and operand feature are new and are not backwards compatible with older versions of capa +- Python 3.7 is now the minimum supported Python version #866 @williballenthin ### New Rules (4) diff --git a/capa/exceptions.py b/capa/exceptions.py new file mode 100644 index 00000000..8c939997 --- /dev/null +++ b/capa/exceptions.py @@ -0,0 +1,14 @@ +class UnsupportedRuntimeError(RuntimeError): + pass + + +class UnsupportedFormatError(ValueError): + pass + + +class UnsupportedArchError(ValueError): + pass + + +class UnsupportedOSError(ValueError): + pass diff --git a/capa/features/common.py b/capa/features/common.py index 7f25e8d4..bff1138c 100644 --- a/capa/features/common.py +++ b/capa/features/common.py @@ -410,7 +410,9 @@ VALID_BITNESS = (BITNESS_X32, BITNESS_X64) # other candidates here: https://docs.microsoft.com/en-us/windows/win32/debug/pe-format#machine-types ARCH_I386 = "i386" ARCH_AMD64 = "amd64" -VALID_ARCH = (ARCH_I386, ARCH_AMD64) +# dotnet +ARCH_ANY = "any" +VALID_ARCH = (ARCH_I386, ARCH_AMD64, ARCH_ANY) class Arch(Feature): @@ -422,8 +424,10 @@ class Arch(Feature): OS_WINDOWS = "windows" OS_LINUX = "linux" OS_MACOS = "macos" +# dotnet +OS_ANY = "any" VALID_OS = {os.value for os in capa.features.extractors.elf.OS} -VALID_OS.update({OS_WINDOWS, OS_LINUX, OS_MACOS}) +VALID_OS.update({OS_WINDOWS, OS_LINUX, OS_MACOS, OS_ANY}) class OS(Feature): @@ -434,7 +438,14 @@ class OS(Feature): FORMAT_PE = "pe" FORMAT_ELF = "elf" -VALID_FORMAT = (FORMAT_PE, FORMAT_ELF) +FORMAT_DOTNET = "dotnet" +VALID_FORMAT = (FORMAT_PE, FORMAT_ELF, FORMAT_DOTNET) +# internal only, not to be used in rules +FORMAT_AUTO = "auto" +FORMAT_SC32 = "sc32" +FORMAT_SC64 = "sc64" +FORMAT_FREEZE = "freeze" +FORMAT_UNKNOWN = "unknown" class Format(Feature): diff --git a/capa/features/extractors/common.py b/capa/features/extractors/common.py index 99f0ea08..786e4faf 100644 --- a/capa/features/extractors/common.py +++ b/capa/features/extractors/common.py @@ -8,7 +8,8 @@ import pefile import capa.features import capa.features.extractors.elf import capa.features.extractors.pefile -from capa.features.common import OS, FORMAT_PE, FORMAT_ELF, OS_WINDOWS, Arch, Format, String +from capa.features.common import OS, FORMAT_PE, FORMAT_ELF, OS_WINDOWS, FORMAT_FREEZE, Arch, Format, String +from capa.features.freeze import is_freeze logger = logging.getLogger(__name__) @@ -29,6 +30,8 @@ def extract_format(buf): yield Format(FORMAT_PE), 0x0 elif buf.startswith(b"\x7fELF"): yield Format(FORMAT_ELF), 0x0 + elif is_freeze(buf): + yield Format(FORMAT_FREEZE), 0x0 else: # we likely end up here: # 1. handling a file format (e.g. macho) diff --git a/capa/features/extractors/dnfile_.py b/capa/features/extractors/dnfile_.py new file mode 100644 index 00000000..c20fd32b --- /dev/null +++ b/capa/features/extractors/dnfile_.py @@ -0,0 +1,105 @@ +import logging +from typing import Tuple, Iterator + +import dnfile + +from capa.features.common import OS, OS_ANY, ARCH_ANY, ARCH_I386, ARCH_AMD64, FORMAT_DOTNET, Arch, Format, Feature +from capa.features.extractors.base_extractor import FeatureExtractor + +logger = logging.getLogger(__name__) + + +def extract_file_format(**kwargs): + yield Format(FORMAT_DOTNET), 0x0 + + +def extract_file_os(**kwargs): + yield OS(OS_ANY), 0x0 + + +def extract_file_arch(pe, **kwargs): + # TODO differences for versions < 4.5? + # via https://stackoverflow.com/a/23614024/10548020 + if pe.net.Flags.CLR_32BITREQUIRED and pe.net.Flags.CLR_PREFER_32BIT: + yield Arch(ARCH_I386), 0x0 + elif not pe.net.Flags.CLR_32BITREQUIRED and not pe.net.Flags.CLR_PREFER_32BIT: + yield Arch(ARCH_AMD64), 0x0 + else: + yield Arch(ARCH_ANY), 0x0 + + +def extract_file_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, int]]: + for file_handler in FILE_HANDLERS: + for feature, va in file_handler(pe=pe): # type: ignore + yield feature, va + + +FILE_HANDLERS = ( + # extract_file_export_names, + # extract_file_import_names, + # extract_file_section_names, + # extract_file_strings, + # extract_file_function_names, + extract_file_format, +) + + +def extract_global_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, int]]: + for handler in GLOBAL_HANDLERS: + for feature, va in handler(pe=pe): # type: ignore + yield feature, va + + +GLOBAL_HANDLERS = ( + extract_file_os, + extract_file_arch, +) + + +class DnfileFeatureExtractor(FeatureExtractor): + def __init__(self, path: str): + super(DnfileFeatureExtractor, self).__init__() + self.path: str = path + self.pe: dnfile.dnPE = dnfile.dnPE(path) + + def get_base_address(self) -> int: + return self.pe.net.struct.EntryPointTokenOrRva + + def extract_global_features(self): + yield from extract_global_features(self.pe) + + def extract_file_features(self): + yield from extract_file_features(self.pe) + + def is_dotnet_file(self) -> bool: + return bool(self.pe.net) + + def get_runtime_version(self) -> Tuple[int, int]: + return self.pe.net.struct.MajorRuntimeVersion, self.pe.net.struct.MinorRuntimeVersion + + def get_meta_version_string(self) -> str: + return self.pe.net.metadata.struct.Version.decode("utf-8") + + def get_functions(self): + raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features") + + def extract_function_features(self, f): + raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features") + + def get_basic_blocks(self, f): + raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features") + + def extract_basic_block_features(self, f, bb): + raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features") + + def get_instructions(self, f, bb): + raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features") + + def extract_insn_features(self, f, bb, insn): + raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features") + + def is_library_function(self, va): + raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features") + + def get_function_name(self, va): + raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features") diff --git a/capa/features/freeze.py b/capa/features/freeze.py index bfa92460..c86d9165 100644 --- a/capa/features/freeze.py +++ b/capa/features/freeze.py @@ -53,13 +53,12 @@ import zlib import logging from typing import Dict, Type +import capa.helpers import capa.features.file import capa.features.insn import capa.features.common import capa.features.basicblock import capa.features.extractors.base_extractor -from capa.helpers import hex -from capa.features.common import Feature logger = logging.getLogger(__name__) @@ -87,6 +86,7 @@ def dumps(extractor): returns: str: the serialized features. """ + hex = capa.helpers.hex ret = { "version": 1, "base address": extractor.get_base_address(), diff --git a/capa/helpers.py b/capa/helpers.py index e36ca3ac..5c0bcfd6 100644 --- a/capa/helpers.py +++ b/capa/helpers.py @@ -5,10 +5,20 @@ # Unless required by applicable law or agreed to in writing, software distributed under the License # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. - import os +import logging from typing import NoReturn +from capa.exceptions import UnsupportedFormatError +from capa.features.common import FORMAT_SC32, FORMAT_SC64, FORMAT_UNKNOWN +from capa.features.extractors.common import extract_format + +EXTENSIONS_SHELLCODE_32 = ("sc32", "raw32") +EXTENSIONS_SHELLCODE_64 = ("sc64", "raw64") + + +logger = logging.getLogger("capa") + _hex = hex @@ -35,3 +45,72 @@ def is_runtime_ida(): def assert_never(value: NoReturn) -> NoReturn: assert False, f"Unhandled value: {value} ({type(value).__name__})" + + +def get_format_from_extension(sample: str) -> str: + if sample.endswith(EXTENSIONS_SHELLCODE_32): + return FORMAT_SC32 + elif sample.endswith(EXTENSIONS_SHELLCODE_64): + return FORMAT_SC64 + return FORMAT_UNKNOWN + + +def get_auto_format(path: str) -> str: + format_ = get_format(path) + if format_ == FORMAT_UNKNOWN: + format_ = get_format_from_extension(path) + if format_ == FORMAT_UNKNOWN: + raise UnsupportedFormatError() + return format_ + + +def get_format(sample: str) -> str: + with open(sample, "rb") as f: + buf = f.read() + + for feature, _ in extract_format(buf): + assert isinstance(feature.value, str) + return feature.value + + return FORMAT_UNKNOWN + + +def log_unsupported_format_error(): + logger.error("-" * 80) + logger.error(" Input file does not appear to be a PE or ELF file.") + logger.error(" ") + logger.error( + " capa currently only supports analyzing PE and ELF files (or shellcode, when using --format sc32|sc64)." + ) + logger.error(" If you don't know the input file type, you can try using the `file` utility to guess it.") + logger.error("-" * 80) + + +def log_unsupported_os_error(): + logger.error("-" * 80) + logger.error(" Input file does not appear to target a supported OS.") + logger.error(" ") + logger.error( + " capa currently only supports analyzing executables for some operating systems (including Windows and Linux)." + ) + logger.error("-" * 80) + + +def log_unsupported_arch_error(): + logger.error("-" * 80) + logger.error(" Input file does not appear to target a supported architecture.") + logger.error(" ") + logger.error(" capa currently only supports analyzing x86 (32- and 64-bit).") + logger.error("-" * 80) + + +def log_unsupported_runtime_error(): + logger.error("-" * 80) + logger.error(" Unsupported runtime or Python interpreter.") + logger.error(" ") + logger.error(" capa supports running under Python 3.7 and higher.") + logger.error(" ") + logger.error( + " If you're seeing this message on the command line, please ensure you're running a supported Python version." + ) + logger.error("-" * 80) diff --git a/capa/main.py b/capa/main.py index 007cb241..eee2c294 100644 --- a/capa/main.py +++ b/capa/main.py @@ -41,18 +41,35 @@ import capa.render.vverbose import capa.features.extractors import capa.features.extractors.common import capa.features.extractors.pefile +import capa.features.extractors.dnfile_ import capa.features.extractors.elffile from capa.rules import Rule, Scope, RuleSet from capa.engine import FeatureSet, MatchResults -from capa.helpers import get_file_taste -from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle, FeatureExtractor +from capa.helpers import ( + get_format, + get_file_taste, + get_auto_format, + log_unsupported_os_error, + log_unsupported_arch_error, + log_unsupported_format_error, +) +from capa.exceptions import UnsupportedOSError, UnsupportedArchError, UnsupportedFormatError, UnsupportedRuntimeError +from capa.features.common import ( + FORMAT_PE, + FORMAT_ELF, + FORMAT_AUTO, + FORMAT_SC32, + FORMAT_SC64, + FORMAT_DOTNET, + FORMAT_FREEZE, +) +from capa.features.extractors.base_extractor import FunctionHandle, FeatureExtractor RULES_PATH_DEFAULT_STRING = "(embedded rules)" SIGNATURES_PATH_DEFAULT_STRING = "(embedded signatures)" BACKEND_VIV = "vivisect" BACKEND_SMDA = "smda" -EXTENSIONS_SHELLCODE_32 = ("sc32", "raw32") -EXTENSIONS_SHELLCODE_64 = ("sc64", "raw64") +BACKEND_DOTNET = "dotnet" E_MISSING_RULES = -10 E_MISSING_FILE = -11 @@ -287,6 +304,7 @@ def find_capabilities(ruleset: RuleSet, extractor: FeatureExtractor, disable_pro return matches, meta +# TODO move all to helpers? def has_rule_with_namespace(rules, capabilities, rule_cat): for rule_name in capabilities.keys(): if rules.rules[rule_name].meta.get("namespace", "").startswith(rule_cat): @@ -334,17 +352,6 @@ def is_supported_format(sample: str) -> bool: return len(list(capa.features.extractors.common.extract_format(taste))) == 1 -def get_format(sample: str) -> str: - with open(sample, "rb") as f: - buf = f.read() - - for feature, _ in capa.features.extractors.common.extract_format(buf): - assert isinstance(feature.value, str) - return feature.value - - return "unknown" - - def is_supported_arch(sample: str) -> bool: with open(sample, "rb") as f: buf = f.read() @@ -433,19 +440,7 @@ def get_default_signatures() -> List[str]: return ret -class UnsupportedFormatError(ValueError): - pass - - -class UnsupportedArchError(ValueError): - pass - - -class UnsupportedOSError(ValueError): - pass - - -def get_workspace(path, format, sigpaths): +def get_workspace(path, format_, sigpaths): """ load the program at the given path into a vivisect workspace using the given format. also apply the given FLIRT signatures. @@ -465,21 +460,22 @@ def get_workspace(path, format, sigpaths): import viv_utils logger.debug("generating vivisect workspace for: %s", path) - if format == "auto": + # TODO should not be auto at this point, anymore + if format_ == FORMAT_AUTO: if not is_supported_format(path): raise UnsupportedFormatError() # don't analyze, so that we can add our Flirt function analyzer first. vw = viv_utils.getWorkspace(path, analyze=False, should_save=False) - elif format in {"pe", "elf"}: + elif format_ in {FORMAT_PE, FORMAT_ELF}: vw = viv_utils.getWorkspace(path, analyze=False, should_save=False) - elif format == "sc32": + elif format_ == FORMAT_SC32: # these are not analyzed nor saved. vw = viv_utils.getShellcodeWorkspaceFromFile(path, arch="i386", analyze=False) - elif format == "sc64": + elif format_ == FORMAT_SC64: vw = viv_utils.getShellcodeWorkspaceFromFile(path, arch="amd64", analyze=False) else: - raise ValueError("unexpected format: " + format) + raise ValueError("unexpected format: " + format_) viv_utils.flirt.register_flirt_signature_analyzers(vw, sigpaths) @@ -489,12 +485,9 @@ def get_workspace(path, format, sigpaths): return vw -class UnsupportedRuntimeError(RuntimeError): - pass - - +# TODO get_extractors -> List[FeatureExtractor]? def get_extractor( - path: str, format: str, backend: str, sigpaths: List[str], should_save_workspace=False, disable_progress=False + path: str, format_: str, backend: str, sigpaths: List[str], should_save_workspace=False, disable_progress=False ) -> FeatureExtractor: """ raises: @@ -502,7 +495,7 @@ def get_extractor( UnsupportedArchError UnsupportedOSError """ - if format not in ("sc32", "sc64"): + if format_ not in (FORMAT_SC32, FORMAT_SC64): if not is_supported_format(path): raise UnsupportedFormatError() @@ -512,6 +505,10 @@ def get_extractor( if not is_supported_os(path): raise UnsupportedOSError() + if format_ == FORMAT_DOTNET: + # TODO return capa.features.extractors.dotnet.extractor.DnFeatureExtractor(...) + raise NotImplementedError("DnFeatureExtractor") + if backend == "smda": from smda.SmdaConfig import SmdaConfig from smda.Disassembler import Disassembler @@ -530,7 +527,7 @@ def get_extractor( import capa.features.extractors.viv.extractor with halo.Halo(text="analyzing program", spinner="simpleDots", stream=sys.stderr, enabled=not disable_progress): - vw = get_workspace(path, format, sigpaths) + vw = get_workspace(path, format_, sigpaths) if should_save_workspace: logger.debug("saving workspace") @@ -545,6 +542,22 @@ def get_extractor( return capa.features.extractors.viv.extractor.VivisectFeatureExtractor(vw, path) +def get_file_extractors(sample: str, format_: str) -> List[FeatureExtractor]: + file_extractors: List[FeatureExtractor] = list() + + if format_ == capa.features.extractors.common.FORMAT_PE: + file_extractors.append(capa.features.extractors.pefile.PefileFeatureExtractor(sample)) + + dnfile_extractor = capa.features.extractors.dnfile_.DnfileFeatureExtractor(sample) + if dnfile_extractor.is_dotnet_file(): + file_extractors.append(dnfile_extractor) + + elif format_ == capa.features.extractors.common.FORMAT_ELF: + file_extractors.append(capa.features.extractors.elffile.ElfFeatureExtractor(sample)) + + return file_extractors + + def is_nursery_rule_path(path: str) -> bool: """ The nursery is a spot for rules that have not yet been fully polished. @@ -652,7 +665,7 @@ def collect_metadata(argv, sample_path, rules_path, extractor): if rules_path != RULES_PATH_DEFAULT_STRING: rules_path = os.path.abspath(os.path.normpath(rules_path)) - format = get_format(sample_path) + format_ = get_format(sample_path) arch = get_arch(sample_path) os_ = get_os(sample_path) @@ -667,7 +680,7 @@ def collect_metadata(argv, sample_path, rules_path, extractor): "path": os.path.normpath(sample_path), }, "analysis": { - "format": format, + "format": format_, "arch": arch, "os": os_, "extractor": extractor.__class__.__name__, @@ -782,19 +795,20 @@ def install_common_args(parser, wanted=None): if "format" in wanted: formats = [ - ("auto", "(default) detect file type automatically"), - ("pe", "Windows PE file"), - ("elf", "Executable and Linkable Format"), - ("sc32", "32-bit shellcode"), - ("sc64", "64-bit shellcode"), - ("freeze", "features previously frozen by capa"), + (FORMAT_AUTO, "(default) detect file type automatically"), + (FORMAT_PE, "Windows PE file"), + (FORMAT_DOTNET, ".NET PE file"), + (FORMAT_ELF, "Executable and Linkable Format"), + (FORMAT_SC32, "32-bit shellcode"), + (FORMAT_SC64, "64-bit shellcode"), + (FORMAT_FREEZE, "features previously frozen by capa"), ] format_help = ", ".join(["%s: %s" % (f[0], f[1]) for f in formats]) parser.add_argument( "-f", "--format", choices=[f[0] for f in formats], - default="auto", + default=FORMAT_AUTO, help="select sample format, %s" % format_help, ) @@ -963,13 +977,21 @@ def main(argv=None): return ret try: - taste = get_file_taste(args.sample) + _ = get_file_taste(args.sample) except IOError as e: # per our research there's not a programmatic way to render the IOError with non-ASCII filename unless we # handle the IOError separately and reach into the args logger.error("%s", e.args[0]) return E_MISSING_FILE + format_ = args.format + if format_ == FORMAT_AUTO: + try: + format_ = get_auto_format(args.sample) + except UnsupportedFormatError: + log_unsupported_format_error() + return E_INVALID_FILE_TYPE + try: rules = get_rules(args.rules, disable_progress=args.quiet) rules = capa.rules.RuleSet(rules) @@ -991,26 +1013,23 @@ def main(argv=None): logger.error("%s", str(e)) return E_INVALID_RULE - file_extractor = None - if args.format == "pe" or (args.format == "auto" and taste.startswith(b"MZ")): - # these pefile and elffile file feature extractors are pretty light weight: they don't do any code analysis. - # so we can fairly quickly determine if the given file has "pure" file-scope rules - # that indicate a limitation (like "file is packed based on section names") - # and avoid doing a full code analysis on difficult/impossible binaries. - try: - file_extractor = capa.features.extractors.pefile.PefileFeatureExtractor(args.sample) - except PEFormatError as e: - logger.error("Input file '%s' is not a valid PE file: %s", args.sample, str(e)) - return E_CORRUPT_FILE + # file feature extractors are pretty lightweight: they don't do any code analysis. + # so we can fairly quickly determine if the given file has "pure" file-scope rules + # that indicate a limitation (like "file is packed based on section names") + # and avoid doing a full code analysis on difficult/impossible binaries. + # + # this pass can inspect multiple file extractors, e.g., dotnet and pe to identify + # various limitations + try: + file_extractors = get_file_extractors(args.sample, format_) + except PEFormatError as e: + logger.error("Input file '%s' is not a valid PE file: %s", args.sample, str(e)) + return E_CORRUPT_FILE + except (ELFError, OverflowError) as e: + logger.error("Input file '%s' is not a valid ELF file: %s", args.sample, str(e)) + return E_CORRUPT_FILE - elif args.format == "elf" or (args.format == "auto" and taste.startswith(b"\x7fELF")): - try: - file_extractor = capa.features.extractors.elffile.ElfFeatureExtractor(args.sample) - except (ELFError, OverflowError) as e: - logger.error("Input file '%s' is not a valid ELF file: %s", args.sample, str(e)) - return E_CORRUPT_FILE - - if file_extractor: + for file_extractor in file_extractors: try: pure_file_capabilities, _ = find_file_capabilities(rules, file_extractor, {}) except PEFormatError as e: @@ -1029,58 +1048,37 @@ def main(argv=None): logger.debug("file limitation short circuit, won't analyze fully.") return E_FILE_LIMITATION - try: - if args.format == "pe" or (args.format == "auto" and taste.startswith(b"MZ")): - sig_paths = get_signatures(args.signatures) - else: - sig_paths = [] - logger.debug("skipping library code matching: only have PE signatures") - except (IOError) as e: - logger.error("%s", str(e)) - return E_INVALID_SIG + if isinstance(file_extractor, capa.features.extractors.dnfile_.DnfileFeatureExtractor): + format_ = FORMAT_DOTNET - if (args.format == "freeze") or (args.format == "auto" and capa.features.freeze.is_freeze(taste)): - format = "freeze" + if format_ == FORMAT_FREEZE: with open(args.sample, "rb") as f: extractor = capa.features.freeze.load(f.read()) else: - format = args.format - if format == "auto" and args.sample.endswith(EXTENSIONS_SHELLCODE_32): - format = "sc32" - elif format == "auto" and args.sample.endswith(EXTENSIONS_SHELLCODE_64): - format = "sc64" + try: + if format_ == FORMAT_PE: + sig_paths = get_signatures(args.signatures) + else: + sig_paths = [] + logger.debug("skipping library code matching: only have native PE signatures") + except IOError as e: + logger.error("%s", str(e)) + return E_INVALID_SIG should_save_workspace = os.environ.get("CAPA_SAVE_WORKSPACE") not in ("0", "no", "NO", "n", None) try: extractor = get_extractor( - args.sample, format, args.backend, sig_paths, should_save_workspace, disable_progress=args.quiet + args.sample, format_, args.backend, sig_paths, should_save_workspace, disable_progress=args.quiet ) except UnsupportedFormatError: - logger.error("-" * 80) - logger.error(" Input file does not appear to be a PE or ELF file.") - logger.error(" ") - logger.error( - " capa currently only supports analyzing PE and ELF files (or shellcode, when using --format sc32|sc64)." - ) - logger.error(" If you don't know the input file type, you can try using the `file` utility to guess it.") - logger.error("-" * 80) + log_unsupported_format_error() return E_INVALID_FILE_TYPE except UnsupportedArchError: - logger.error("-" * 80) - logger.error(" Input file does not appear to target a supported architecture.") - logger.error(" ") - logger.error(" capa currently only supports analyzing x86 (32- and 64-bit).") - logger.error("-" * 80) + log_unsupported_arch_error() return E_INVALID_FILE_ARCH except UnsupportedOSError: - logger.error("-" * 80) - logger.error(" Input file does not appear to target a supported OS.") - logger.error(" ") - logger.error( - " capa currently only supports analyzing executables for some operating systems (including Windows and Linux)." - ) - logger.error("-" * 80) + log_unsupported_os_error() return E_INVALID_FILE_OS meta = collect_metadata(argv, args.sample, args.rules, extractor) diff --git a/capa/render/json.py b/capa/render/json.py index a70f9122..9f595d4a 100644 --- a/capa/render/json.py +++ b/capa/render/json.py @@ -7,9 +7,9 @@ # See the License for the specific language governing permissions and limitations under the License. import json -import capa.render.result_document from capa.rules import RuleSet from capa.engine import MatchResults +from capa.render.result_document import convert_capabilities_to_result_document class CapaJsonObjectEncoder(json.JSONEncoder): @@ -27,7 +27,7 @@ class CapaJsonObjectEncoder(json.JSONEncoder): def render(meta, rules: RuleSet, capabilities: MatchResults) -> str: return json.dumps( - capa.render.result_document.convert_capabilities_to_result_document(meta, rules, capabilities), + convert_capabilities_to_result_document(meta, rules, capabilities), cls=CapaJsonObjectEncoder, sort_keys=True, ) diff --git a/capa/render/result_document.py b/capa/render/result_document.py index 1a0bde69..33e083fa 100644 --- a/capa/render/result_document.py +++ b/capa/render/result_document.py @@ -7,7 +7,6 @@ # See the License for the specific language governing permissions and limitations under the License. import copy -import capa.rules import capa.engine import capa.render.utils import capa.features.common diff --git a/scripts/lint.py b/scripts/lint.py index f3f16164..f4ba39ed 100644 --- a/scripts/lint.py +++ b/scripts/lint.py @@ -41,6 +41,7 @@ import tqdm.contrib.logging import capa.main import capa.rules import capa.engine +import capa.helpers import capa.features.insn import capa.features.common from capa.rules import Rule, RuleSet @@ -286,16 +287,16 @@ def get_sample_capabilities(ctx: Context, path: Path) -> Set[str]: logger.debug("found cached results: %s: %d capabilities", nice_path, len(ctx.capabilities_by_sample[path])) return ctx.capabilities_by_sample[path] - if nice_path.endswith(capa.main.EXTENSIONS_SHELLCODE_32): - format = "sc32" - elif nice_path.endswith(capa.main.EXTENSIONS_SHELLCODE_64): - format = "sc64" + if nice_path.endswith(capa.helpers.EXTENSIONS_SHELLCODE_32): + format_ = "sc32" + elif nice_path.endswith(capa.helpers.EXTENSIONS_SHELLCODE_64): + format_ = "sc64" else: - format = "auto" + format_ = "auto" logger.debug("analyzing sample: %s", nice_path) extractor = capa.main.get_extractor( - nice_path, format, capa.main.BACKEND_VIV, DEFAULT_SIGNATURES, False, disable_progress=True + nice_path, format_, capa.main.BACKEND_VIV, DEFAULT_SIGNATURES, False, disable_progress=True ) capabilities, _ = capa.main.find_capabilities(ctx.rules, extractor, disable_progress=True) diff --git a/scripts/show-capabilities-by-function.py b/scripts/show-capabilities-by-function.py index 4f5761b6..b2af9446 100644 --- a/scripts/show-capabilities-by-function.py +++ b/scripts/show-capabilities-by-function.py @@ -59,7 +59,9 @@ import colorama import capa.main import capa.rules import capa.engine +import capa.helpers import capa.features +import capa.exceptions import capa.render.utils as rutils import capa.features.freeze import capa.render.result_document @@ -162,25 +164,11 @@ def main(argv=None): extractor = capa.main.get_extractor( args.sample, args.format, args.backend, sig_paths, should_save_workspace ) - except capa.main.UnsupportedFormatError: - logger.error("-" * 80) - logger.error(" Input file does not appear to be a PE file.") - logger.error(" ") - logger.error( - " capa currently only supports analyzing PE files (or shellcode, when using --format sc32|sc64)." - ) - logger.error(" If you don't know the input file type, you can try using the `file` utility to guess it.") - logger.error("-" * 80) + except capa.exceptions.UnsupportedFormatError: + capa.helpers.log_unsupported_format_error() return -1 - except capa.main.UnsupportedRuntimeError: - logger.error("-" * 80) - logger.error(" Unsupported runtime or Python interpreter.") - logger.error(" ") - logger.error(" capa supports running under Python 2.7 using Vivisect for binary analysis.") - logger.error(" It can also run within IDA Pro, using either Python 2.7 or 3.5+.") - logger.error(" ") - logger.error(" If you're seeing this message on the command line, please ensure you're running Python 2.7.") - logger.error("-" * 80) + except capa.exceptions.UnsupportedRuntimeError: + capa.helpers.log_unsupported_runtime_error() return -1 meta = capa.main.collect_metadata(argv, args.sample, args.rules, extractor) diff --git a/scripts/show-features.py b/scripts/show-features.py index a4f7f3b2..a070f653 100644 --- a/scripts/show-features.py +++ b/scripts/show-features.py @@ -75,8 +75,10 @@ import capa.rules import capa.engine import capa.helpers import capa.features +import capa.exceptions import capa.features.common import capa.features.freeze +from capa.helpers import log_unsupported_runtime_error logger = logging.getLogger("capa.show-features") @@ -113,25 +115,11 @@ def main(argv=None): extractor = capa.main.get_extractor( args.sample, args.format, args.backend, sig_paths, should_save_workspace ) - except capa.main.UnsupportedFormatError: - logger.error("-" * 80) - logger.error(" Input file does not appear to be a PE file.") - logger.error(" ") - logger.error( - " capa currently only supports analyzing PE files (or shellcode, when using --format sc32|sc64)." - ) - logger.error(" If you don't know the input file type, you can try using the `file` utility to guess it.") - logger.error("-" * 80) + except capa.exceptions.UnsupportedFormatError: + capa.helpers.log_unsupported_format_error() return -1 - except capa.main.UnsupportedRuntimeError: - logger.error("-" * 80) - logger.error(" Unsupported runtime or Python interpreter.") - logger.error(" ") - logger.error(" capa supports running under Python 2.7 using Vivisect for binary analysis.") - logger.error(" It can also run within IDA Pro, using either Python 2.7 or 3.5+.") - logger.error(" ") - logger.error(" If you're seeing this message on the command line, please ensure you're running Python 2.7.") - logger.error("-" * 80) + except capa.exceptions.UnsupportedRuntimeError: + log_unsupported_runtime_error() return -1 if not args.function: diff --git a/setup.py b/setup.py index f49a07c3..f07ae6c9 100644 --- a/setup.py +++ b/setup.py @@ -26,6 +26,7 @@ requirements = [ "smda==1.7.1", "pefile==2021.9.3", "pyelftools==0.28", + "dnfile==0.10.0", ] # this sets __version__ diff --git a/tests/fixtures.py b/tests/fixtures.py index 8630bccf..b544304b 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -224,6 +224,8 @@ def get_data_path_by_name(name): return os.path.join(CD, "data", "79abd17391adc6251ecdc58d13d76baf.dll_") elif name.startswith("946a9"): return os.path.join(CD, "data", "946a99f36a46d335dec080d9a4371940.dll_") + elif name.startswith("b9f5b"): + return os.path.join(CD, "data", "b9f5bd514485fb06da39beff051b9fdc.exe_") else: raise ValueError("unexpected sample fixture: %s" % name) @@ -276,7 +278,9 @@ def get_sample_md5_by_name(name): elif name.startswith("79abd"): return "79abd17391adc6251ecdc58d13d76baf" elif name.startswith("946a9"): - return "946a99f36a46d335dec080d9a4371940.dll_" + return "946a99f36a46d335dec080d9a4371940" + elif name.startswith("b9f5b"): + return "b9f5bd514485fb06da39beff051b9fdc" else: raise ValueError("unexpected sample fixture: %s" % name) @@ -583,6 +587,16 @@ FEATURE_PRESENCE_TESTS = sorted( key=lambda t: (t[0], t[1]), ) +FEATURE_PRESENCE_TESTS_DOTNET = sorted( + [ + ("b9f5b", "file", Arch(ARCH_I386), True), + ("b9f5b", "file", Arch(ARCH_AMD64), False), + ], + # order tests by (file, item) + # so that our LRU cache is most effective. + key=lambda t: (t[0], t[1]), +) + FEATURE_PRESENCE_TESTS_IDA = [ # file/imports # IDA can recover more names of APIs imported by ordinal @@ -695,3 +709,8 @@ def al_khaser_x86_extractor(): @pytest.fixture def pingtaest_extractor(): return get_extractor(get_data_path_by_name("pingtaest")) + + +@pytest.fixture +def b9f5b_extractor(): + return get_extractor(get_data_path_by_name("b9f5b")) diff --git a/tests/test_dotnet_features.py b/tests/test_dotnet_features.py new file mode 100644 index 00000000..449d7b55 --- /dev/null +++ b/tests/test_dotnet_features.py @@ -0,0 +1,25 @@ +# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: [package root]/LICENSE.txt +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and limitations under the License. + +# b9f5bd514485fb06da39beff051b9fdc + +import pytest +import fixtures +from fixtures import * +from fixtures import parametrize + +import capa.features.file + + +@parametrize( + "sample,scope,feature,expected", + fixtures.FEATURE_PRESENCE_TESTS_DOTNET, + indirect=["sample", "scope"], +) +def test_dnfile_features(sample, scope, feature, expected): + fixtures.do_test_feature_presence(fixtures.get_pefile_extractor, sample, scope, feature, expected)