feat: start dotnet detection (#955)

* feat: start dotnet detection * Apply suggestions from code review Co-authored-by: Willi Ballenthin <willi.ballenthin@gmail.com> * refactor: dn instead of dotnet * refactor: format branches, extractor reorg * refactor: format selection and dotnet detect * feat: get format, arch, os * refactor: log errors and exceptions * ci: also test and build for dotnet-main dev * fix: import path * fix: circular dep * fix: remove buf argument feat: get runtime meta data * fix: log unsupported runtime error * fix: type ignore Co-authored-by: Willi Ballenthin <willi.ballenthin@gmail.com>
2025-12-12 15:49:46 -08:00 · 2022-04-06 11:24:05 +02:00
parent de312d87dc
commit b5be876e61
18 changed files with 399 additions and 167 deletions
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -2,7 +2,7 @@ name: build

 on:
  push:
-    branches: [master]
+    branches: [master, dotnet-main]
  release:
    types: [edited, published]

--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -2,9 +2,9 @@ name: CI

 on:
  push:
-    branches: [ master ]
+    branches: [ master, dotnet-main ]
  pull_request:
-    branches: [ master ]
+    branches: [ master, dotnet-main ]

 # save workspaces to speed up testing
 env:
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,14 +4,15 @@

 ### New Features

- - add new scope "instruction" for matching mnemonics and operands #767 @williballenthin
- - add new feature "operand[{0, 1, 2}].number" for matching instruction operand immediate values #767 @williballenthin
- - add new feature "operand[{0, 1, 2}].offset" for matching instruction operand offsets #767 @williballenthin
+- add new scope "instruction" for matching mnemonics and operands #767 @williballenthin
+- add new feature "operand[{0, 1, 2}].number" for matching instruction operand immediate values #767 @williballenthin
+- add new feature "operand[{0, 1, 2}].offset" for matching instruction operand offsets #767 @williballenthin
+- main: detect dotnet binaries #955 @mr-tz

 ### Breaking Changes

-  - instruction scope and operand feature are new and are not backwards compatible with older versions of capa
-  - Python 3.7 is now the minimum supported Python version #866 @williballenthin
+- instruction scope and operand feature are new and are not backwards compatible with older versions of capa
+- Python 3.7 is now the minimum supported Python version #866 @williballenthin

 ### New Rules (4)

--- a/capa/exceptions.py
+++ b/capa/exceptions.py
@@ -0,0 +1,14 @@
+class UnsupportedRuntimeError(RuntimeError):
+    pass
+
+
+class UnsupportedFormatError(ValueError):
+    pass
+
+
+class UnsupportedArchError(ValueError):
+    pass
+
+
+class UnsupportedOSError(ValueError):
+    pass
--- a/capa/features/common.py
+++ b/capa/features/common.py
@@ -410,7 +410,9 @@ VALID_BITNESS = (BITNESS_X32, BITNESS_X64)
 # other candidates here: https://docs.microsoft.com/en-us/windows/win32/debug/pe-format#machine-types
 ARCH_I386 = "i386"
 ARCH_AMD64 = "amd64"
-VALID_ARCH = (ARCH_I386, ARCH_AMD64)
+# dotnet
+ARCH_ANY = "any"
+VALID_ARCH = (ARCH_I386, ARCH_AMD64, ARCH_ANY)


 class Arch(Feature):
@@ -422,8 +424,10 @@ class Arch(Feature):
 OS_WINDOWS = "windows"
 OS_LINUX = "linux"
 OS_MACOS = "macos"
+# dotnet
+OS_ANY = "any"
 VALID_OS = {os.value for os in capa.features.extractors.elf.OS}
-VALID_OS.update({OS_WINDOWS, OS_LINUX, OS_MACOS})
+VALID_OS.update({OS_WINDOWS, OS_LINUX, OS_MACOS, OS_ANY})


 class OS(Feature):
@@ -434,7 +438,14 @@ class OS(Feature):

 FORMAT_PE = "pe"
 FORMAT_ELF = "elf"
-VALID_FORMAT = (FORMAT_PE, FORMAT_ELF)
+FORMAT_DOTNET = "dotnet"
+VALID_FORMAT = (FORMAT_PE, FORMAT_ELF, FORMAT_DOTNET)
+# internal only, not to be used in rules
+FORMAT_AUTO = "auto"
+FORMAT_SC32 = "sc32"
+FORMAT_SC64 = "sc64"
+FORMAT_FREEZE = "freeze"
+FORMAT_UNKNOWN = "unknown"


 class Format(Feature):
--- a/capa/features/extractors/common.py
+++ b/capa/features/extractors/common.py
@@ -8,7 +8,8 @@ import pefile
 import capa.features
 import capa.features.extractors.elf
 import capa.features.extractors.pefile
-from capa.features.common import OS, FORMAT_PE, FORMAT_ELF, OS_WINDOWS, Arch, Format, String
+from capa.features.common import OS, FORMAT_PE, FORMAT_ELF, OS_WINDOWS, FORMAT_FREEZE, Arch, Format, String
+from capa.features.freeze import is_freeze

 logger = logging.getLogger(__name__)

@@ -29,6 +30,8 @@ def extract_format(buf):
        yield Format(FORMAT_PE), 0x0
    elif buf.startswith(b"\x7fELF"):
        yield Format(FORMAT_ELF), 0x0
+    elif is_freeze(buf):
+        yield Format(FORMAT_FREEZE), 0x0
    else:
        # we likely end up here:
        #  1. handling a file format (e.g. macho)
--- a/capa/features/extractors/dnfile_.py
+++ b/capa/features/extractors/dnfile_.py
@@ -0,0 +1,105 @@
+import logging
+from typing import Tuple, Iterator
+
+import dnfile
+
+from capa.features.common import OS, OS_ANY, ARCH_ANY, ARCH_I386, ARCH_AMD64, FORMAT_DOTNET, Arch, Format, Feature
+from capa.features.extractors.base_extractor import FeatureExtractor
+
+logger = logging.getLogger(__name__)
+
+
+def extract_file_format(**kwargs):
+    yield Format(FORMAT_DOTNET), 0x0
+
+
+def extract_file_os(**kwargs):
+    yield OS(OS_ANY), 0x0
+
+
+def extract_file_arch(pe, **kwargs):
+    # TODO differences for versions < 4.5?
+    # via https://stackoverflow.com/a/23614024/10548020
+    if pe.net.Flags.CLR_32BITREQUIRED and pe.net.Flags.CLR_PREFER_32BIT:
+        yield Arch(ARCH_I386), 0x0
+    elif not pe.net.Flags.CLR_32BITREQUIRED and not pe.net.Flags.CLR_PREFER_32BIT:
+        yield Arch(ARCH_AMD64), 0x0
+    else:
+        yield Arch(ARCH_ANY), 0x0
+
+
+def extract_file_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, int]]:
+    for file_handler in FILE_HANDLERS:
+        for feature, va in file_handler(pe=pe):  # type: ignore
+            yield feature, va
+
+
+FILE_HANDLERS = (
+    # extract_file_export_names,
+    # extract_file_import_names,
+    # extract_file_section_names,
+    # extract_file_strings,
+    # extract_file_function_names,
+    extract_file_format,
+)
+
+
+def extract_global_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, int]]:
+    for handler in GLOBAL_HANDLERS:
+        for feature, va in handler(pe=pe):  # type: ignore
+            yield feature, va
+
+
+GLOBAL_HANDLERS = (
+    extract_file_os,
+    extract_file_arch,
+)
+
+
+class DnfileFeatureExtractor(FeatureExtractor):
+    def __init__(self, path: str):
+        super(DnfileFeatureExtractor, self).__init__()
+        self.path: str = path
+        self.pe: dnfile.dnPE = dnfile.dnPE(path)
+
+    def get_base_address(self) -> int:
+        return self.pe.net.struct.EntryPointTokenOrRva
+
+    def extract_global_features(self):
+        yield from extract_global_features(self.pe)
+
+    def extract_file_features(self):
+        yield from extract_file_features(self.pe)
+
+    def is_dotnet_file(self) -> bool:
+        return bool(self.pe.net)
+
+    def get_runtime_version(self) -> Tuple[int, int]:
+        return self.pe.net.struct.MajorRuntimeVersion, self.pe.net.struct.MinorRuntimeVersion
+
+    def get_meta_version_string(self) -> str:
+        return self.pe.net.metadata.struct.Version.decode("utf-8")
+
+    def get_functions(self):
+        raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features")
+
+    def extract_function_features(self, f):
+        raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features")
+
+    def get_basic_blocks(self, f):
+        raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features")
+
+    def extract_basic_block_features(self, f, bb):
+        raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features")
+
+    def get_instructions(self, f, bb):
+        raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features")
+
+    def extract_insn_features(self, f, bb, insn):
+        raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features")
+
+    def is_library_function(self, va):
+        raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features")
+
+    def get_function_name(self, va):
+        raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features")
--- a/capa/features/freeze.py
+++ b/capa/features/freeze.py
@@ -53,13 +53,12 @@ import zlib
 import logging
 from typing import Dict, Type

+import capa.helpers
 import capa.features.file
 import capa.features.insn
 import capa.features.common
 import capa.features.basicblock
 import capa.features.extractors.base_extractor
-from capa.helpers import hex
-from capa.features.common import Feature

 logger = logging.getLogger(__name__)

@@ -87,6 +86,7 @@ def dumps(extractor):
    returns:
      str: the serialized features.
    """
+    hex = capa.helpers.hex
    ret = {
        "version": 1,
        "base address": extractor.get_base_address(),
--- a/capa/helpers.py
+++ b/capa/helpers.py
@@ -5,10 +5,20 @@
 # Unless required by applicable law or agreed to in writing, software distributed under the License
 #  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and limitations under the License.
-
 import os
+import logging
 from typing import NoReturn

+from capa.exceptions import UnsupportedFormatError
+from capa.features.common import FORMAT_SC32, FORMAT_SC64, FORMAT_UNKNOWN
+from capa.features.extractors.common import extract_format
+
+EXTENSIONS_SHELLCODE_32 = ("sc32", "raw32")
+EXTENSIONS_SHELLCODE_64 = ("sc64", "raw64")
+
+
+logger = logging.getLogger("capa")
+
 _hex = hex


@@ -35,3 +45,72 @@ def is_runtime_ida():

 def assert_never(value: NoReturn) -> NoReturn:
    assert False, f"Unhandled value: {value} ({type(value).__name__})"
+
+
+def get_format_from_extension(sample: str) -> str:
+    if sample.endswith(EXTENSIONS_SHELLCODE_32):
+        return FORMAT_SC32
+    elif sample.endswith(EXTENSIONS_SHELLCODE_64):
+        return FORMAT_SC64
+    return FORMAT_UNKNOWN
+
+
+def get_auto_format(path: str) -> str:
+    format_ = get_format(path)
+    if format_ == FORMAT_UNKNOWN:
+        format_ = get_format_from_extension(path)
+    if format_ == FORMAT_UNKNOWN:
+        raise UnsupportedFormatError()
+    return format_
+
+
+def get_format(sample: str) -> str:
+    with open(sample, "rb") as f:
+        buf = f.read()
+
+    for feature, _ in extract_format(buf):
+        assert isinstance(feature.value, str)
+        return feature.value
+
+    return FORMAT_UNKNOWN
+
+
+def log_unsupported_format_error():
+    logger.error("-" * 80)
+    logger.error(" Input file does not appear to be a PE or ELF file.")
+    logger.error(" ")
+    logger.error(
+        " capa currently only supports analyzing PE and ELF files (or shellcode, when using --format sc32|sc64)."
+    )
+    logger.error(" If you don't know the input file type, you can try using the `file` utility to guess it.")
+    logger.error("-" * 80)
+
+
+def log_unsupported_os_error():
+    logger.error("-" * 80)
+    logger.error(" Input file does not appear to target a supported OS.")
+    logger.error(" ")
+    logger.error(
+        " capa currently only supports analyzing executables for some operating systems (including Windows and Linux)."
+    )
+    logger.error("-" * 80)
+
+
+def log_unsupported_arch_error():
+    logger.error("-" * 80)
+    logger.error(" Input file does not appear to target a supported architecture.")
+    logger.error(" ")
+    logger.error(" capa currently only supports analyzing x86 (32- and 64-bit).")
+    logger.error("-" * 80)
+
+
+def log_unsupported_runtime_error():
+    logger.error("-" * 80)
+    logger.error(" Unsupported runtime or Python interpreter.")
+    logger.error(" ")
+    logger.error(" capa supports running under Python 3.7 and higher.")
+    logger.error(" ")
+    logger.error(
+        " If you're seeing this message on the command line, please ensure you're running a supported Python version."
+    )
+    logger.error("-" * 80)
--- a/capa/main.py
+++ b/capa/main.py
@@ -41,18 +41,35 @@ import capa.render.vverbose
 import capa.features.extractors
 import capa.features.extractors.common
 import capa.features.extractors.pefile
+import capa.features.extractors.dnfile_
 import capa.features.extractors.elffile
 from capa.rules import Rule, Scope, RuleSet
 from capa.engine import FeatureSet, MatchResults
-from capa.helpers import get_file_taste
-from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle, FeatureExtractor
+from capa.helpers import (
+    get_format,
+    get_file_taste,
+    get_auto_format,
+    log_unsupported_os_error,
+    log_unsupported_arch_error,
+    log_unsupported_format_error,
+)
+from capa.exceptions import UnsupportedOSError, UnsupportedArchError, UnsupportedFormatError, UnsupportedRuntimeError
+from capa.features.common import (
+    FORMAT_PE,
+    FORMAT_ELF,
+    FORMAT_AUTO,
+    FORMAT_SC32,
+    FORMAT_SC64,
+    FORMAT_DOTNET,
+    FORMAT_FREEZE,
+)
+from capa.features.extractors.base_extractor import FunctionHandle, FeatureExtractor

 RULES_PATH_DEFAULT_STRING = "(embedded rules)"
 SIGNATURES_PATH_DEFAULT_STRING = "(embedded signatures)"
 BACKEND_VIV = "vivisect"
 BACKEND_SMDA = "smda"
-EXTENSIONS_SHELLCODE_32 = ("sc32", "raw32")
-EXTENSIONS_SHELLCODE_64 = ("sc64", "raw64")
+BACKEND_DOTNET = "dotnet"

 E_MISSING_RULES = -10
 E_MISSING_FILE = -11
@@ -287,6 +304,7 @@ def find_capabilities(ruleset: RuleSet, extractor: FeatureExtractor, disable_pro
    return matches, meta


+# TODO move all to helpers?
 def has_rule_with_namespace(rules, capabilities, rule_cat):
    for rule_name in capabilities.keys():
        if rules.rules[rule_name].meta.get("namespace", "").startswith(rule_cat):
@@ -334,17 +352,6 @@ def is_supported_format(sample: str) -> bool:
    return len(list(capa.features.extractors.common.extract_format(taste))) == 1


-def get_format(sample: str) -> str:
-    with open(sample, "rb") as f:
-        buf = f.read()
-
-    for feature, _ in capa.features.extractors.common.extract_format(buf):
-        assert isinstance(feature.value, str)
-        return feature.value
-
-    return "unknown"
-
-
 def is_supported_arch(sample: str) -> bool:
    with open(sample, "rb") as f:
        buf = f.read()
@@ -433,19 +440,7 @@ def get_default_signatures() -> List[str]:
    return ret


-class UnsupportedFormatError(ValueError):
-    pass
-
-
-class UnsupportedArchError(ValueError):
-    pass
-
-
-class UnsupportedOSError(ValueError):
-    pass
-
-
-def get_workspace(path, format, sigpaths):
+def get_workspace(path, format_, sigpaths):
    """
    load the program at the given path into a vivisect workspace using the given format.
    also apply the given FLIRT signatures.
@@ -465,21 +460,22 @@ def get_workspace(path, format, sigpaths):
    import viv_utils

    logger.debug("generating vivisect workspace for: %s", path)
-    if format == "auto":
+    # TODO should not be auto at this point, anymore
+    if format_ == FORMAT_AUTO:
        if not is_supported_format(path):
            raise UnsupportedFormatError()

        # don't analyze, so that we can add our Flirt function analyzer first.
        vw = viv_utils.getWorkspace(path, analyze=False, should_save=False)
-    elif format in {"pe", "elf"}:
+    elif format_ in {FORMAT_PE, FORMAT_ELF}:
        vw = viv_utils.getWorkspace(path, analyze=False, should_save=False)
-    elif format == "sc32":
+    elif format_ == FORMAT_SC32:
        # these are not analyzed nor saved.
        vw = viv_utils.getShellcodeWorkspaceFromFile(path, arch="i386", analyze=False)
-    elif format == "sc64":
+    elif format_ == FORMAT_SC64:
        vw = viv_utils.getShellcodeWorkspaceFromFile(path, arch="amd64", analyze=False)
    else:
-        raise ValueError("unexpected format: " + format)
+        raise ValueError("unexpected format: " + format_)

    viv_utils.flirt.register_flirt_signature_analyzers(vw, sigpaths)

@@ -489,12 +485,9 @@ def get_workspace(path, format, sigpaths):
    return vw


-class UnsupportedRuntimeError(RuntimeError):
-    pass
-
-
+# TODO get_extractors -> List[FeatureExtractor]?
 def get_extractor(
-    path: str, format: str, backend: str, sigpaths: List[str], should_save_workspace=False, disable_progress=False
+    path: str, format_: str, backend: str, sigpaths: List[str], should_save_workspace=False, disable_progress=False
 ) -> FeatureExtractor:
    """
    raises:
@@ -502,7 +495,7 @@ def get_extractor(
      UnsupportedArchError
      UnsupportedOSError
    """
-    if format not in ("sc32", "sc64"):
+    if format_ not in (FORMAT_SC32, FORMAT_SC64):
        if not is_supported_format(path):
            raise UnsupportedFormatError()

@@ -512,6 +505,10 @@ def get_extractor(
        if not is_supported_os(path):
            raise UnsupportedOSError()

+    if format_ == FORMAT_DOTNET:
+        # TODO return capa.features.extractors.dotnet.extractor.DnFeatureExtractor(...)
+        raise NotImplementedError("DnFeatureExtractor")
+
    if backend == "smda":
        from smda.SmdaConfig import SmdaConfig
        from smda.Disassembler import Disassembler
@@ -530,7 +527,7 @@ def get_extractor(
        import capa.features.extractors.viv.extractor

        with halo.Halo(text="analyzing program", spinner="simpleDots", stream=sys.stderr, enabled=not disable_progress):
-            vw = get_workspace(path, format, sigpaths)
+            vw = get_workspace(path, format_, sigpaths)

            if should_save_workspace:
                logger.debug("saving workspace")
@@ -545,6 +542,22 @@ def get_extractor(
        return capa.features.extractors.viv.extractor.VivisectFeatureExtractor(vw, path)


+def get_file_extractors(sample: str, format_: str) -> List[FeatureExtractor]:
+    file_extractors: List[FeatureExtractor] = list()
+
+    if format_ == capa.features.extractors.common.FORMAT_PE:
+        file_extractors.append(capa.features.extractors.pefile.PefileFeatureExtractor(sample))
+
+        dnfile_extractor = capa.features.extractors.dnfile_.DnfileFeatureExtractor(sample)
+        if dnfile_extractor.is_dotnet_file():
+            file_extractors.append(dnfile_extractor)
+
+    elif format_ == capa.features.extractors.common.FORMAT_ELF:
+        file_extractors.append(capa.features.extractors.elffile.ElfFeatureExtractor(sample))
+
+    return file_extractors
+
+
 def is_nursery_rule_path(path: str) -> bool:
    """
    The nursery is a spot for rules that have not yet been fully polished.
@@ -652,7 +665,7 @@ def collect_metadata(argv, sample_path, rules_path, extractor):
    if rules_path != RULES_PATH_DEFAULT_STRING:
        rules_path = os.path.abspath(os.path.normpath(rules_path))

-    format = get_format(sample_path)
+    format_ = get_format(sample_path)
    arch = get_arch(sample_path)
    os_ = get_os(sample_path)

@@ -667,7 +680,7 @@ def collect_metadata(argv, sample_path, rules_path, extractor):
            "path": os.path.normpath(sample_path),
        },
        "analysis": {
-            "format": format,
+            "format": format_,
            "arch": arch,
            "os": os_,
            "extractor": extractor.__class__.__name__,
@@ -782,19 +795,20 @@ def install_common_args(parser, wanted=None):

    if "format" in wanted:
        formats = [
-            ("auto", "(default) detect file type automatically"),
-            ("pe", "Windows PE file"),
-            ("elf", "Executable and Linkable Format"),
-            ("sc32", "32-bit shellcode"),
-            ("sc64", "64-bit shellcode"),
-            ("freeze", "features previously frozen by capa"),
+            (FORMAT_AUTO, "(default) detect file type automatically"),
+            (FORMAT_PE, "Windows PE file"),
+            (FORMAT_DOTNET, ".NET PE file"),
+            (FORMAT_ELF, "Executable and Linkable Format"),
+            (FORMAT_SC32, "32-bit shellcode"),
+            (FORMAT_SC64, "64-bit shellcode"),
+            (FORMAT_FREEZE, "features previously frozen by capa"),
        ]
        format_help = ", ".join(["%s: %s" % (f[0], f[1]) for f in formats])
        parser.add_argument(
            "-f",
            "--format",
            choices=[f[0] for f in formats],
-            default="auto",
+            default=FORMAT_AUTO,
            help="select sample format, %s" % format_help,
        )

@@ -963,13 +977,21 @@ def main(argv=None):
        return ret

    try:
-        taste = get_file_taste(args.sample)
+        _ = get_file_taste(args.sample)
    except IOError as e:
        # per our research there's not a programmatic way to render the IOError with non-ASCII filename unless we
        # handle the IOError separately and reach into the args
        logger.error("%s", e.args[0])
        return E_MISSING_FILE

+    format_ = args.format
+    if format_ == FORMAT_AUTO:
+        try:
+            format_ = get_auto_format(args.sample)
+        except UnsupportedFormatError:
+            log_unsupported_format_error()
+            return E_INVALID_FILE_TYPE
+
    try:
        rules = get_rules(args.rules, disable_progress=args.quiet)
        rules = capa.rules.RuleSet(rules)
@@ -991,26 +1013,23 @@ def main(argv=None):
        logger.error("%s", str(e))
        return E_INVALID_RULE

-    file_extractor = None
-    if args.format == "pe" or (args.format == "auto" and taste.startswith(b"MZ")):
-        # these pefile and elffile file feature extractors are pretty light weight: they don't do any code analysis.
-        # so we can fairly quickly determine if the given file has "pure" file-scope rules
-        # that indicate a limitation (like "file is packed based on section names")
-        # and avoid doing a full code analysis on difficult/impossible binaries.
-        try:
-            file_extractor = capa.features.extractors.pefile.PefileFeatureExtractor(args.sample)
-        except PEFormatError as e:
-            logger.error("Input file '%s' is not a valid PE file: %s", args.sample, str(e))
-            return E_CORRUPT_FILE
+    # file feature extractors are pretty lightweight: they don't do any code analysis.
+    # so we can fairly quickly determine if the given file has "pure" file-scope rules
+    # that indicate a limitation (like "file is packed based on section names")
+    # and avoid doing a full code analysis on difficult/impossible binaries.
+    #
+    # this pass can inspect multiple file extractors, e.g., dotnet and pe to identify
+    # various limitations
+    try:
+        file_extractors = get_file_extractors(args.sample, format_)
+    except PEFormatError as e:
+        logger.error("Input file '%s' is not a valid PE file: %s", args.sample, str(e))
+        return E_CORRUPT_FILE
+    except (ELFError, OverflowError) as e:
+        logger.error("Input file '%s' is not a valid ELF file: %s", args.sample, str(e))
+        return E_CORRUPT_FILE

-    elif args.format == "elf" or (args.format == "auto" and taste.startswith(b"\x7fELF")):
-        try:
-            file_extractor = capa.features.extractors.elffile.ElfFeatureExtractor(args.sample)
-        except (ELFError, OverflowError) as e:
-            logger.error("Input file '%s' is not a valid ELF file: %s", args.sample, str(e))
-            return E_CORRUPT_FILE
-
-    if file_extractor:
+    for file_extractor in file_extractors:
        try:
            pure_file_capabilities, _ = find_file_capabilities(rules, file_extractor, {})
        except PEFormatError as e:
@@ -1029,58 +1048,37 @@ def main(argv=None):
                logger.debug("file limitation short circuit, won't analyze fully.")
                return E_FILE_LIMITATION

-    try:
-        if args.format == "pe" or (args.format == "auto" and taste.startswith(b"MZ")):
-            sig_paths = get_signatures(args.signatures)
-        else:
-            sig_paths = []
-            logger.debug("skipping library code matching: only have PE signatures")
-    except (IOError) as e:
-        logger.error("%s", str(e))
-        return E_INVALID_SIG
+        if isinstance(file_extractor, capa.features.extractors.dnfile_.DnfileFeatureExtractor):
+            format_ = FORMAT_DOTNET

-    if (args.format == "freeze") or (args.format == "auto" and capa.features.freeze.is_freeze(taste)):
-        format = "freeze"
+    if format_ == FORMAT_FREEZE:
        with open(args.sample, "rb") as f:
            extractor = capa.features.freeze.load(f.read())
    else:
-        format = args.format
-        if format == "auto" and args.sample.endswith(EXTENSIONS_SHELLCODE_32):
-            format = "sc32"
-        elif format == "auto" and args.sample.endswith(EXTENSIONS_SHELLCODE_64):
-            format = "sc64"
+        try:
+            if format_ == FORMAT_PE:
+                sig_paths = get_signatures(args.signatures)
+            else:
+                sig_paths = []
+                logger.debug("skipping library code matching: only have native PE signatures")
+        except IOError as e:
+            logger.error("%s", str(e))
+            return E_INVALID_SIG

        should_save_workspace = os.environ.get("CAPA_SAVE_WORKSPACE") not in ("0", "no", "NO", "n", None)

        try:
            extractor = get_extractor(
-                args.sample, format, args.backend, sig_paths, should_save_workspace, disable_progress=args.quiet
+                args.sample, format_, args.backend, sig_paths, should_save_workspace, disable_progress=args.quiet
            )
        except UnsupportedFormatError:
-            logger.error("-" * 80)
-            logger.error(" Input file does not appear to be a PE or ELF file.")
-            logger.error(" ")
-            logger.error(
-                " capa currently only supports analyzing PE and ELF files (or shellcode, when using --format sc32|sc64)."
-            )
-            logger.error(" If you don't know the input file type, you can try using the `file` utility to guess it.")
-            logger.error("-" * 80)
+            log_unsupported_format_error()
            return E_INVALID_FILE_TYPE
        except UnsupportedArchError:
-            logger.error("-" * 80)
-            logger.error(" Input file does not appear to target a supported architecture.")
-            logger.error(" ")
-            logger.error(" capa currently only supports analyzing x86 (32- and 64-bit).")
-            logger.error("-" * 80)
+            log_unsupported_arch_error()
            return E_INVALID_FILE_ARCH
        except UnsupportedOSError:
-            logger.error("-" * 80)
-            logger.error(" Input file does not appear to target a supported OS.")
-            logger.error(" ")
-            logger.error(
-                " capa currently only supports analyzing executables for some operating systems (including Windows and Linux)."
-            )
-            logger.error("-" * 80)
+            log_unsupported_os_error()
            return E_INVALID_FILE_OS

    meta = collect_metadata(argv, args.sample, args.rules, extractor)
--- a/capa/render/json.py
+++ b/capa/render/json.py
@@ -7,9 +7,9 @@
 # See the License for the specific language governing permissions and limitations under the License.
 import json

-import capa.render.result_document
 from capa.rules import RuleSet
 from capa.engine import MatchResults
+from capa.render.result_document import convert_capabilities_to_result_document


 class CapaJsonObjectEncoder(json.JSONEncoder):
@@ -27,7 +27,7 @@ class CapaJsonObjectEncoder(json.JSONEncoder):

 def render(meta, rules: RuleSet, capabilities: MatchResults) -> str:
    return json.dumps(
-        capa.render.result_document.convert_capabilities_to_result_document(meta, rules, capabilities),
+        convert_capabilities_to_result_document(meta, rules, capabilities),
        cls=CapaJsonObjectEncoder,
        sort_keys=True,
    )
--- a/capa/render/result_document.py
+++ b/capa/render/result_document.py
@@ -7,7 +7,6 @@
 # See the License for the specific language governing permissions and limitations under the License.
 import copy

-import capa.rules
 import capa.engine
 import capa.render.utils
 import capa.features.common
--- a/scripts/lint.py
+++ b/scripts/lint.py
@@ -41,6 +41,7 @@ import tqdm.contrib.logging
 import capa.main
 import capa.rules
 import capa.engine
+import capa.helpers
 import capa.features.insn
 import capa.features.common
 from capa.rules import Rule, RuleSet
@@ -286,16 +287,16 @@ def get_sample_capabilities(ctx: Context, path: Path) -> Set[str]:
        logger.debug("found cached results: %s: %d capabilities", nice_path, len(ctx.capabilities_by_sample[path]))
        return ctx.capabilities_by_sample[path]

-    if nice_path.endswith(capa.main.EXTENSIONS_SHELLCODE_32):
-        format = "sc32"
-    elif nice_path.endswith(capa.main.EXTENSIONS_SHELLCODE_64):
-        format = "sc64"
+    if nice_path.endswith(capa.helpers.EXTENSIONS_SHELLCODE_32):
+        format_ = "sc32"
+    elif nice_path.endswith(capa.helpers.EXTENSIONS_SHELLCODE_64):
+        format_ = "sc64"
    else:
-        format = "auto"
+        format_ = "auto"

    logger.debug("analyzing sample: %s", nice_path)
    extractor = capa.main.get_extractor(
-        nice_path, format, capa.main.BACKEND_VIV, DEFAULT_SIGNATURES, False, disable_progress=True
+        nice_path, format_, capa.main.BACKEND_VIV, DEFAULT_SIGNATURES, False, disable_progress=True
    )

    capabilities, _ = capa.main.find_capabilities(ctx.rules, extractor, disable_progress=True)
--- a/scripts/show-capabilities-by-function.py
+++ b/scripts/show-capabilities-by-function.py
@@ -59,7 +59,9 @@ import colorama
 import capa.main
 import capa.rules
 import capa.engine
+import capa.helpers
 import capa.features
+import capa.exceptions
 import capa.render.utils as rutils
 import capa.features.freeze
 import capa.render.result_document
@@ -162,25 +164,11 @@ def main(argv=None):
            extractor = capa.main.get_extractor(
                args.sample, args.format, args.backend, sig_paths, should_save_workspace
            )
-        except capa.main.UnsupportedFormatError:
-            logger.error("-" * 80)
-            logger.error(" Input file does not appear to be a PE file.")
-            logger.error(" ")
-            logger.error(
-                " capa currently only supports analyzing PE files (or shellcode, when using --format sc32|sc64)."
-            )
-            logger.error(" If you don't know the input file type, you can try using the `file` utility to guess it.")
-            logger.error("-" * 80)
+        except capa.exceptions.UnsupportedFormatError:
+            capa.helpers.log_unsupported_format_error()
            return -1
-        except capa.main.UnsupportedRuntimeError:
-            logger.error("-" * 80)
-            logger.error(" Unsupported runtime or Python interpreter.")
-            logger.error(" ")
-            logger.error(" capa supports running under Python 2.7 using Vivisect for binary analysis.")
-            logger.error(" It can also run within IDA Pro, using either Python 2.7 or 3.5+.")
-            logger.error(" ")
-            logger.error(" If you're seeing this message on the command line, please ensure you're running Python 2.7.")
-            logger.error("-" * 80)
+        except capa.exceptions.UnsupportedRuntimeError:
+            capa.helpers.log_unsupported_runtime_error()
            return -1

    meta = capa.main.collect_metadata(argv, args.sample, args.rules, extractor)
--- a/scripts/show-features.py
+++ b/scripts/show-features.py
@@ -75,8 +75,10 @@ import capa.rules
 import capa.engine
 import capa.helpers
 import capa.features
+import capa.exceptions
 import capa.features.common
 import capa.features.freeze
+from capa.helpers import log_unsupported_runtime_error

 logger = logging.getLogger("capa.show-features")

@@ -113,25 +115,11 @@ def main(argv=None):
            extractor = capa.main.get_extractor(
                args.sample, args.format, args.backend, sig_paths, should_save_workspace
            )
-        except capa.main.UnsupportedFormatError:
-            logger.error("-" * 80)
-            logger.error(" Input file does not appear to be a PE file.")
-            logger.error(" ")
-            logger.error(
-                " capa currently only supports analyzing PE files (or shellcode, when using --format sc32|sc64)."
-            )
-            logger.error(" If you don't know the input file type, you can try using the `file` utility to guess it.")
-            logger.error("-" * 80)
+        except capa.exceptions.UnsupportedFormatError:
+            capa.helpers.log_unsupported_format_error()
            return -1
-        except capa.main.UnsupportedRuntimeError:
-            logger.error("-" * 80)
-            logger.error(" Unsupported runtime or Python interpreter.")
-            logger.error(" ")
-            logger.error(" capa supports running under Python 2.7 using Vivisect for binary analysis.")
-            logger.error(" It can also run within IDA Pro, using either Python 2.7 or 3.5+.")
-            logger.error(" ")
-            logger.error(" If you're seeing this message on the command line, please ensure you're running Python 2.7.")
-            logger.error("-" * 80)
+        except capa.exceptions.UnsupportedRuntimeError:
+            log_unsupported_runtime_error()
            return -1

    if not args.function:
--- a/setup.py
+++ b/setup.py
@@ -26,6 +26,7 @@ requirements = [
    "smda==1.7.1",
    "pefile==2021.9.3",
    "pyelftools==0.28",
+    "dnfile==0.10.0",
 ]

 # this sets __version__
--- a/tests/fixtures.py
+++ b/tests/fixtures.py
@@ -224,6 +224,8 @@ def get_data_path_by_name(name):
        return os.path.join(CD, "data", "79abd17391adc6251ecdc58d13d76baf.dll_")
    elif name.startswith("946a9"):
        return os.path.join(CD, "data", "946a99f36a46d335dec080d9a4371940.dll_")
+    elif name.startswith("b9f5b"):
+        return os.path.join(CD, "data", "b9f5bd514485fb06da39beff051b9fdc.exe_")
    else:
        raise ValueError("unexpected sample fixture: %s" % name)

@@ -276,7 +278,9 @@ def get_sample_md5_by_name(name):
    elif name.startswith("79abd"):
        return "79abd17391adc6251ecdc58d13d76baf"
    elif name.startswith("946a9"):
-        return "946a99f36a46d335dec080d9a4371940.dll_"
+        return "946a99f36a46d335dec080d9a4371940"
+    elif name.startswith("b9f5b"):
+        return "b9f5bd514485fb06da39beff051b9fdc"
    else:
        raise ValueError("unexpected sample fixture: %s" % name)

@@ -583,6 +587,16 @@ FEATURE_PRESENCE_TESTS = sorted(
    key=lambda t: (t[0], t[1]),
 )

+FEATURE_PRESENCE_TESTS_DOTNET = sorted(
+    [
+        ("b9f5b", "file", Arch(ARCH_I386), True),
+        ("b9f5b", "file", Arch(ARCH_AMD64), False),
+    ],
+    # order tests by (file, item)
+    # so that our LRU cache is most effective.
+    key=lambda t: (t[0], t[1]),
+)
+
 FEATURE_PRESENCE_TESTS_IDA = [
    # file/imports
    # IDA can recover more names of APIs imported by ordinal
@@ -695,3 +709,8 @@ def al_khaser_x86_extractor():
@pytest.fixture
 def pingtaest_extractor():
    return get_extractor(get_data_path_by_name("pingtaest"))
+
+
+@pytest.fixture
+def b9f5b_extractor():
+    return get_extractor(get_data_path_by_name("b9f5b"))
--- a/tests/test_dotnet_features.py
+++ b/tests/test_dotnet_features.py
@@ -0,0 +1,25 @@
+# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at: [package root]/LICENSE.txt
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+#  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and limitations under the License.
+
+# b9f5bd514485fb06da39beff051b9fdc
+
+import pytest
+import fixtures
+from fixtures import *
+from fixtures import parametrize
+
+import capa.features.file
+
+
+@parametrize(
+    "sample,scope,feature,expected",
+    fixtures.FEATURE_PRESENCE_TESTS_DOTNET,
+    indirect=["sample", "scope"],
+)
+def test_dnfile_features(sample, scope, feature, expected):
+    fixtures.do_test_feature_presence(fixtures.get_pefile_extractor, sample, scope, feature, expected)