Merge branch 'master' of github.com:mandiant/capa into feature-981

This commit is contained in:
Willi Ballenthin
2022-04-08 16:01:59 -06:00
20 changed files with 703 additions and 17 deletions

View File

@@ -74,3 +74,6 @@ ignore_missing_imports = True
[mypy-elftools.*]
ignore_missing_imports = True
[mypy-dncil.*]
ignore_missing_imports = True

View File

@@ -9,6 +9,8 @@
- add new feature "operand[{0, 1, 2}].offset" for matching instruction operand offsets #767 @williballenthin
- main: detect dotnet binaries #955 @mr-tz
- render: support Addresses that aren't simple integers, like .NET token+offset #981 @williballenthin
- extract additional offset/number features in certain circumstances #320 @williballenthin
- add detection and basic feature extraction for dotnet #987 @mr-tz, @mike-hunhoff, @williballenthin
### Breaking Changes

View File

@@ -12,6 +12,8 @@ import capa.features.extractors.pefile
from capa.features.common import OS, FORMAT_PE, FORMAT_ELF, OS_WINDOWS, FORMAT_FREEZE, Arch, Format, String, Feature
from capa.features.freeze import is_freeze
from capa.features.address import NO_ADDRESS, Address, FileOffsetAddress
from capa.features.common import OS, FORMAT_PE, FORMAT_ELF, OS_WINDOWS, FORMAT_FREEZE, Arch, Format, String
from capa.features.freeze import is_freeze
logger = logging.getLogger(__name__)

View File

@@ -0,0 +1,70 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
from __future__ import annotations
from typing import TYPE_CHECKING, Any, List, Tuple
if TYPE_CHECKING:
from capa.features.common import Feature
import dnfile
import capa.features.extractors
import capa.features.extractors.dnfile.file
import capa.features.extractors.dnfile.insn
from capa.features.extractors.base_extractor import FeatureExtractor
from capa.features.extractors.dnfile.helpers import get_dotnet_managed_method_bodies
class DnfileFeatureExtractor(FeatureExtractor):
def __init__(self, path: str):
super(DnfileFeatureExtractor, self).__init__()
self.pe: dnfile.dnPE = dnfile.dnPE(path)
# pre-compute these because we'll yield them at *every* scope.
self.global_features: List[Tuple[Feature, int]] = []
self.global_features.extend(capa.features.extractors.dotnetfile.extract_file_os(pe=self.pe))
self.global_features.extend(capa.features.extractors.dotnetfile.extract_file_arch(pe=self.pe))
def get_base_address(self):
return 0x0
def extract_global_features(self):
yield from self.global_features
def extract_file_features(self):
yield from capa.features.extractors.dnfile.file.extract_features(self.pe)
def get_functions(self):
# data structure shared across functions yielded here.
# useful for caching analysis relevant across a single workspace.
ctx = {}
ctx["pe"] = self.pe
for f in get_dotnet_managed_method_bodies(self.pe):
setattr(f, "ctx", ctx)
yield f
def extract_function_features(self, f):
# TODO
yield from []
def get_basic_blocks(self, f):
# each dotnet method is considered 1 basic block
yield f
def extract_basic_block_features(self, f, bb):
# we don't support basic block features
yield from []
def get_instructions(self, f, bb):
yield from f.instructions
def extract_insn_features(self, f, bb, insn):
yield from capa.features.extractors.dnfile.insn.extract_features(f, bb, insn)

View File

@@ -0,0 +1,40 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
from __future__ import annotations
from typing import TYPE_CHECKING, Tuple, Iterator
if TYPE_CHECKING:
import dnfile
from capa.features.common import Feature, Format
from capa.features.file import Import
import capa.features.extractors
def extract_file_import_names(pe: dnfile.dnPE) -> Iterator[Tuple[Import, int]]:
yield from capa.features.extractors.dotnetfile.extract_file_import_names(pe)
def extract_file_format(pe: dnfile.dnPE) -> Iterator[Tuple[Format, int]]:
yield from capa.features.extractors.dotnetfile.extract_file_format(pe=pe)
def extract_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, int]]:
for file_handler in FILE_HANDLERS:
for (feature, token) in file_handler(pe):
yield feature, token
FILE_HANDLERS = (
extract_file_import_names,
# TODO extract_file_strings,
# TODO extract_file_function_names,
extract_file_format,
)

View File

@@ -0,0 +1,169 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
from __future__ import annotations
import logging
from typing import Any, Tuple, Iterator, Optional
import dnfile
from dncil.cil.body import CilMethodBody
from dncil.cil.error import MethodBodyFormatError
from dncil.clr.token import Token, StringToken, InvalidToken
from dncil.cil.body.reader import CilMethodBodyReaderBase
logger = logging.getLogger(__name__)
# key indexes to dotnet metadata tables
DOTNET_META_TABLES_BY_INDEX = {table.value: table.name for table in dnfile.enums.MetadataTables}
class DnfileMethodBodyReader(CilMethodBodyReaderBase):
def __init__(self, pe: dnfile.dnPE, row: dnfile.mdtable.MethodDefRow):
self.pe: dnfile.dnPE = pe
self.offset: int = self.pe.get_offset_from_rva(row.Rva)
def read(self, n: int) -> bytes:
data: bytes = self.pe.get_data(self.pe.get_rva_from_offset(self.offset), n)
self.offset += n
return data
def tell(self) -> int:
return self.offset
def seek(self, offset: int) -> int:
self.offset = offset
return self.offset
def calculate_dotnet_token_value(table: int, rid: int) -> int:
return ((table & 0xFF) << Token.TABLE_SHIFT) | (rid & Token.RID_MASK)
def resolve_dotnet_token(pe: dnfile.dnPE, token: Token) -> Any:
"""map generic token to string or table row"""
if isinstance(token, StringToken):
user_string: Optional[str] = read_dotnet_user_string(pe, token)
if user_string is None:
return InvalidToken(token.value)
return user_string
table_name: str = DOTNET_META_TABLES_BY_INDEX.get(token.table, "")
if not table_name:
# table_index is not valid
return InvalidToken(token.value)
table: Any = getattr(pe.net.mdtables, table_name, None)
if table is None:
# table index is valid but table is not present
return InvalidToken(token.value)
try:
return table.rows[token.rid - 1]
except IndexError:
# table index is valid but row index is not valid
return InvalidToken(token.value)
def read_dotnet_method_body(pe: dnfile.dnPE, row: dnfile.mdtable.MethodDefRow) -> Optional[CilMethodBody]:
"""read dotnet method body"""
try:
return CilMethodBody(DnfileMethodBodyReader(pe, row))
except MethodBodyFormatError as e:
logger.warn("failed to parse managed method body @ 0x%08x (%s)" % (row.Rva, e))
return None
def read_dotnet_user_string(pe: dnfile.dnPE, token: StringToken) -> Optional[str]:
"""read user string from #US stream"""
try:
user_string: Optional[dnfile.stream.UserString] = pe.net.user_strings.get_us(token.rid)
except UnicodeDecodeError as e:
logger.warn("failed to decode #US stream index 0x%08x (%s)" % (token.rid, e))
return None
if user_string is None:
return None
return user_string.value
def get_dotnet_managed_imports(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]:
"""get managed imports from MemberRef table
see https://www.ntcore.com/files/dotnetformat.htm
10 - MemberRef Table
Each row represents an imported method
Class (index into the TypeRef, ModuleRef, MethodDef, TypeSpec or TypeDef tables)
Name (index into String heap)
01 - TypeRef Table
Each row represents an imported class, its namespace and the assembly which contains it
TypeName (index into String heap)
TypeNamespace (index into String heap)
"""
if not hasattr(pe.net.mdtables, "MemberRef"):
return
for (rid, row) in enumerate(pe.net.mdtables.MemberRef):
if not isinstance(row.Class.row, (dnfile.mdtable.TypeRefRow,)):
continue
token: int = calculate_dotnet_token_value(dnfile.enums.MetadataTables.MemberRef.value, rid + 1)
# like System.IO.File::OpenRead
imp: str = f"{row.Class.row.TypeNamespace}.{row.Class.row.TypeName}::{row.Name}"
yield token, imp
def get_dotnet_unmanaged_imports(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]:
"""get unmanaged imports from ImplMap table
see https://www.ntcore.com/files/dotnetformat.htm
28 - ImplMap Table
ImplMap table holds information about unmanaged methods that can be reached from managed code, using PInvoke dispatch
MemberForwarded (index into the Field or MethodDef table; more precisely, a MemberForwarded coded index)
ImportName (index into the String heap)
ImportScope (index into the ModuleRef table)
"""
if not hasattr(pe.net.mdtables, "ImplMap"):
return
for row in pe.net.mdtables.ImplMap:
dll: str = row.ImportScope.row.Name
symbol: str = row.ImportName
# ECMA says "Each row of the ImplMap table associates a row in the MethodDef table (MemberForwarded) with the
# name of a routine (ImportName) in some unmanaged DLL (ImportScope)"; so we calculate and map the MemberForwarded
# MethodDef table token to help us later record native import method calls made from CIL
token: int = calculate_dotnet_token_value(row.MemberForwarded.table.number, row.MemberForwarded.row_index)
# like Kernel32.dll
if dll and "." in dll:
dll = dll.split(".")[0]
# like kernel32.CreateFileA
imp: str = f"{dll}.{symbol}"
yield token, imp
def get_dotnet_managed_method_bodies(pe: dnfile.dnPE) -> Iterator[CilMethodBody]:
"""get managed methods from MethodDef table"""
if not hasattr(pe.net.mdtables, "MethodDef"):
return
for row in pe.net.mdtables.MethodDef:
if not row.ImplFlags.miIL or any((row.Flags.mdAbstract, row.Flags.mdPinvokeImpl)):
# skip methods that do not have a method body
continue
body: Optional[CilMethodBody] = read_dotnet_method_body(pe, row)
if body is None:
continue
yield body

View File

@@ -0,0 +1,96 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
from __future__ import annotations
from typing import TYPE_CHECKING, Dict, Tuple, Iterator, Optional
from itertools import chain
if TYPE_CHECKING:
from dncil.cil.instruction import Instruction
from dncil.cil.body import CilMethodBody
from capa.features.common import Feature
from dncil.clr.token import StringToken
from dncil.cil.opcode import OpCodes
import capa.features.extractors.helpers
from capa.features.insn import API, Number
from capa.features.common import String
from capa.features.extractors.dnfile.helpers import (
read_dotnet_user_string,
get_dotnet_managed_imports,
get_dotnet_unmanaged_imports,
)
def get_imports(ctx: Dict) -> Dict:
if "imports_cache" not in ctx:
ctx["imports_cache"] = {
token: imp
for (token, imp) in chain(get_dotnet_managed_imports(ctx["pe"]), get_dotnet_unmanaged_imports(ctx["pe"]))
}
return ctx["imports_cache"]
def extract_insn_api_features(f: CilMethodBody, bb: CilMethodBody, insn: Instruction) -> Iterator[Tuple[API, int]]:
"""parse instruction API features"""
if insn.opcode not in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp, OpCodes.Calli):
return
name: str = get_imports(f.ctx).get(insn.operand.value, "")
if not name:
return
if "::" in name:
# like System.IO.File::OpenRead
yield API(name), insn.offset
else:
# like kernel32.CreateFileA
dll, _, symbol = name.rpartition(".")
for name_variant in capa.features.extractors.helpers.generate_symbols(dll, symbol):
yield API(name_variant), insn.offset
def extract_insn_number_features(
f: CilMethodBody, bb: CilMethodBody, insn: Instruction
) -> Iterator[Tuple[Number, int]]:
"""parse instruction number features"""
if insn.is_ldc():
yield Number(insn.get_ldc()), insn.offset
def extract_insn_string_features(
f: CilMethodBody, bb: CilMethodBody, insn: Instruction
) -> Iterator[Tuple[String, int]]:
"""parse instruction string features"""
if not insn.is_ldstr():
return
if not isinstance(insn.operand, StringToken):
return
user_string: Optional[str] = read_dotnet_user_string(f.ctx["pe"], insn.operand)
if user_string is None:
return
yield String(user_string), insn.offset
def extract_features(f: CilMethodBody, bb: CilMethodBody, insn: Instruction) -> Iterator[Tuple[Feature, int]]:
"""extract instruction features"""
for inst_handler in INSTRUCTION_HANDLERS:
for (feature, offset) in inst_handler(f, bb, insn):
yield feature, offset
INSTRUCTION_HANDLERS = (
extract_insn_api_features,
extract_insn_number_features,
extract_insn_string_features,
)

View File

@@ -0,0 +1,129 @@
import logging
from typing import Tuple, Iterator
from itertools import chain
import dnfile
import pefile
import capa.features.extractors.helpers
from capa.features.file import Import
from capa.features.common import OS, OS_ANY, ARCH_ANY, ARCH_I386, ARCH_AMD64, FORMAT_DOTNET, Arch, Format, Feature
from capa.features.extractors.base_extractor import FeatureExtractor
from capa.features.extractors.dnfile.helpers import get_dotnet_managed_imports, get_dotnet_unmanaged_imports
logger = logging.getLogger(__name__)
def extract_file_format(**kwargs) -> Iterator[Tuple[Format, int]]:
yield Format(FORMAT_DOTNET), 0x0
def extract_file_import_names(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Import, int]]:
for (token, imp) in chain(get_dotnet_managed_imports(pe), get_dotnet_unmanaged_imports(pe)):
if "::" in imp:
# like System.IO.File::OpenRead
yield Import(imp), token
else:
# like kernel32.CreateFileA
dll, _, symbol = imp.rpartition(".")
for symbol_variant in capa.features.extractors.helpers.generate_symbols(dll, symbol):
yield Import(symbol_variant), token
def extract_file_os(**kwargs) -> Iterator[Tuple[OS, int]]:
yield OS(OS_ANY), 0x0
def extract_file_arch(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Arch, int]]:
# to distinguish in more detail, see https://stackoverflow.com/a/23614024/10548020
# .NET 4.5 added option: any CPU, 32-bit preferred
if pe.net.Flags.CLR_32BITREQUIRED and pe.PE_TYPE == pefile.OPTIONAL_HEADER_MAGIC_PE:
yield Arch(ARCH_I386), 0x0
elif not pe.net.Flags.CLR_32BITREQUIRED and pe.PE_TYPE == pefile.OPTIONAL_HEADER_MAGIC_PE_PLUS:
yield Arch(ARCH_AMD64), 0x0
else:
yield Arch(ARCH_ANY), 0x0
def extract_file_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, int]]:
for file_handler in FILE_HANDLERS:
for feature, va in file_handler(pe=pe): # type: ignore
yield feature, va
FILE_HANDLERS = (
extract_file_import_names,
# TODO extract_file_strings,
# TODO extract_file_function_names,
extract_file_format,
)
def extract_global_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, int]]:
for handler in GLOBAL_HANDLERS:
for feature, va in handler(pe=pe): # type: ignore
yield feature, va
GLOBAL_HANDLERS = (
extract_file_os,
extract_file_arch,
)
class DotnetFileFeatureExtractor(FeatureExtractor):
def __init__(self, path: str):
super(DotnetFileFeatureExtractor, self).__init__()
self.path: str = path
self.pe: dnfile.dnPE = dnfile.dnPE(path)
def get_base_address(self) -> int:
return 0x0
def get_entry_point(self) -> int:
# self.pe.net.Flags.CLT_NATIVE_ENTRYPOINT
# True: native EP: Token
# False: managed EP: RVA
return self.pe.net.struct.EntryPointTokenOrRva
def extract_global_features(self):
yield from extract_global_features(self.pe)
def extract_file_features(self):
yield from extract_file_features(self.pe)
def is_dotnet_file(self) -> bool:
return bool(self.pe.net)
def is_mixed_mode(self) -> bool:
return not bool(self.pe.net.Flags.CLR_ILONLY)
def get_runtime_version(self) -> Tuple[int, int]:
return self.pe.net.struct.MajorRuntimeVersion, self.pe.net.struct.MinorRuntimeVersion
def get_meta_version_string(self) -> str:
return self.pe.net.metadata.struct.Version.rstrip(b"\x00").decode("utf-8")
def get_functions(self):
raise NotImplementedError("DotnetFileFeatureExtractor can only be used to extract file features")
def extract_function_features(self, f):
raise NotImplementedError("DotnetFileFeatureExtractor can only be used to extract file features")
def get_basic_blocks(self, f):
raise NotImplementedError("DotnetFileFeatureExtractor can only be used to extract file features")
def extract_basic_block_features(self, f, bb):
raise NotImplementedError("DotnetFileFeatureExtractor can only be used to extract file features")
def get_instructions(self, f, bb):
raise NotImplementedError("DotnetFileFeatureExtractor can only be used to extract file features")
def extract_insn_features(self, f, bb, insn):
raise NotImplementedError("DotnetFileFeatureExtractor can only be used to extract file features")
def is_library_function(self, va):
raise NotImplementedError("DotnetFileFeatureExtractor can only be used to extract file features")
def get_function_name(self, va):
raise NotImplementedError("DotnetFileFeatureExtractor can only be used to extract file features")

View File

@@ -51,6 +51,9 @@ def generate_symbols(dll: str, symbol: str) -> Iterator[str]:
- CreateFileA
- CreateFile
"""
# normalize dll name
dll = dll.lower()
# kernel32.CreateFileA
yield "%s.%s" % (dll, symbol)

View File

@@ -14,6 +14,8 @@ import capa.features.extractors.helpers
import capa.features.extractors.ida.helpers
from capa.features.insn import API, Number, Offset, Mnemonic, OperandNumber, OperandOffset
from capa.features.common import MAX_BYTES_FEATURE_SIZE, THUNK_CHAIN_DEPTH_DELTA, Bytes, String, Characteristic
from capa.features.insn import API, MAX_STRUCTURE_SIZE, Number, Offset, Mnemonic, OperandNumber, OperandOffset
from capa.features.common import MAX_BYTES_FEATURE_SIZE, THUNK_CHAIN_DEPTH_DELTA, Bytes, String, Characteristic
# security cookie checks may perform non-zeroing XORs, these are expected within a certain
# byte range within the first and returning basic blocks, this helps to reduce FP features

View File

@@ -10,6 +10,8 @@ from capa.features.insn import API, MAX_STRUCTURE_SIZE, Number, Offset, Mnemonic
from capa.features.common import MAX_BYTES_FEATURE_SIZE, THUNK_CHAIN_DEPTH_DELTA, Bytes, String, Feature, Characteristic
from capa.features.address import Address, AbsoluteVirtualAddress
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle
from capa.features.insn import API, MAX_STRUCTURE_SIZE, Number, Offset, Mnemonic, OperandNumber, OperandOffset
from capa.features.common import MAX_BYTES_FEATURE_SIZE, THUNK_CHAIN_DEPTH_DELTA, Bytes, String, Characteristic
# security cookie checks may perform non-zeroing XORs, these are expected within a certain
# byte range within the first and returning basic blocks, this helps to reduce FP features
@@ -271,10 +273,10 @@ def is_security_cookie(f, bb, insn):
for index, block in enumerate(f.getBlocks()):
# expect security cookie init in first basic block within first bytes (instructions)
block_instructions = [i for i in block.getInstructions()]
if index == 0 and ih.address < (block_instructions[0].offset + SECURITY_COOKIE_BYTES_DELTA):
if index == 0 and insn.address < (block_instructions[0].offset + SECURITY_COOKIE_BYTES_DELTA):
return True
# ... or within last bytes (instructions) before a return
if block_instructions[-1].mnemonic.startswith("ret") and ih.address > (
if block_instructions[-1].mnemonic.startswith("ret") and insn.address > (
block_instructions[-1].offset - SECURITY_COOKIE_BYTES_DELTA
):
return True

View File

@@ -19,10 +19,26 @@ import envi.archs.amd64.disasm
import capa.features.extractors.helpers
import capa.features.extractors.viv.helpers
<<<<<<< HEAD
from capa.features.insn import API, MAX_STRUCTURE_SIZE, Number, Offset, Mnemonic, OperandNumber, OperandOffset
from capa.features.common import MAX_BYTES_FEATURE_SIZE, THUNK_CHAIN_DEPTH_DELTA, Bytes, String, Feature, Characteristic
from capa.features.address import Address, AbsoluteVirtualAddress
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle
||||||| de312d8
from capa.features.insn import API, Number, Offset, Mnemonic, OperandNumber, OperandOffset
from capa.features.common import (
BITNESS_X32,
BITNESS_X64,
MAX_BYTES_FEATURE_SIZE,
THUNK_CHAIN_DEPTH_DELTA,
Bytes,
String,
Characteristic,
)
=======
from capa.features.insn import API, MAX_STRUCTURE_SIZE, Number, Offset, Mnemonic, OperandNumber, OperandOffset
from capa.features.common import MAX_BYTES_FEATURE_SIZE, THUNK_CHAIN_DEPTH_DELTA, Bytes, String, Characteristic
>>>>>>> 580a2d7e4519ea5d353650d66468020968f0f27d
from capa.features.extractors.viv.indirect_calls import NotFoundError, resolve_indirect_call
# security cookie checks may perform non-zeroing XORs, these are expected within a certain
@@ -30,9 +46,23 @@ from capa.features.extractors.viv.indirect_calls import NotFoundError, resolve_i
SECURITY_COOKIE_BYTES_DELTA = 0x40
<<<<<<< HEAD
def interface_extract_instruction_XXX(
f: FunctionHandle, bb: BBHandle, insn: InsnHandle
) -> Iterator[Tuple[Feature, Address]]:
||||||| de312d8
def get_bitness(vw):
bitness = vw.getMeta("Architecture")
if bitness == "i386":
return BITNESS_X32
elif bitness == "amd64":
return BITNESS_X64
def interface_extract_instruction_XXX(f, bb, insn):
=======
def interface_extract_instruction_XXX(f, bb, insn):
>>>>>>> 580a2d7e4519ea5d353650d66468020968f0f27d
"""
parse features from the given instruction.

View File

@@ -13,11 +13,6 @@ from capa.features.common import Feature
class API(Feature):
def __init__(self, name: str, description=None):
# Downcase library name if given
if "." in name:
modname, _, impname = name.rpartition(".")
name = modname.lower() + "." + impname
super(API, self).__init__(name, description=description)

View File

@@ -43,6 +43,7 @@ import capa.features.extractors.common
import capa.features.extractors.pefile
import capa.features.extractors.dnfile_
import capa.features.extractors.elffile
import capa.features.extractors.dotnetfile
from capa.rules import Rule, Scope, RuleSet
from capa.engine import FeatureSet, MatchResults
from capa.helpers import (
@@ -64,6 +65,24 @@ from capa.features.common import (
FORMAT_FREEZE,
)
from capa.features.address import NO_ADDRESS
from capa.helpers import (
get_format,
get_file_taste,
get_auto_format,
log_unsupported_os_error,
log_unsupported_arch_error,
log_unsupported_format_error,
)
from capa.exceptions import UnsupportedOSError, UnsupportedArchError, UnsupportedFormatError, UnsupportedRuntimeError
from capa.features.common import (
FORMAT_PE,
FORMAT_ELF,
FORMAT_AUTO,
FORMAT_SC32,
FORMAT_SC64,
FORMAT_DOTNET,
FORMAT_FREEZE,
)
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle, FeatureExtractor
RULES_PATH_DEFAULT_STRING = "(embedded rules)"
@@ -505,8 +524,9 @@ def get_extractor(
raise UnsupportedOSError()
if format_ == FORMAT_DOTNET:
# TODO return capa.features.extractors.dotnet.extractor.DnFeatureExtractor(...)
raise NotImplementedError("DnFeatureExtractor")
import capa.features.extractors.dnfile.extractor
return capa.features.extractors.dnfile.extractor.DnfileFeatureExtractor(path)
if backend == "smda":
from smda.SmdaConfig import SmdaConfig
@@ -1058,7 +1078,7 @@ def main(argv=None):
logger.debug("file limitation short circuit, won't analyze fully.")
return E_FILE_LIMITATION
if isinstance(file_extractor, capa.features.extractors.dnfile_.DnfileFeatureExtractor):
if isinstance(file_extractor, capa.features.extractors.dotnetfile.DotnetFileFeatureExtractor):
format_ = FORMAT_DOTNET
if format_ == FORMAT_FREEZE:

View File

@@ -122,6 +122,12 @@ def main(argv=None):
log_unsupported_runtime_error()
return -1
for feature, va in extractor.extract_global_features():
if va:
print("global: 0x%08x: %s" % (va, feature))
else:
print("global: 0x00000000: %s" % (feature))
if not args.function:
for feature, va in extractor.extract_file_features():
if va:

View File

@@ -27,6 +27,7 @@ requirements = [
"pefile==2021.9.3",
"pyelftools==0.28",
"dnfile==0.10.0",
"dncil==1.0.0",
]
# this sets __version__

View File

@@ -136,10 +136,17 @@ def get_pefile_extractor(path):
return capa.features.extractors.pefile.PefileFeatureExtractor(path)
def get_dnfile_extractor(path):
import capa.features.extractors.dnfile_
def get_dotnetfile_extractor(path):
import capa.features.extractors.dotnetfile
return capa.features.extractors.dnfile_.DnfileFeatureExtractor(path)
return capa.features.extractors.dotnetfile.DotnetFileFeatureExtractor(path)
@lru_cache(maxsize=1)
def get_dnfile_extractor(path):
import capa.features.extractors.dnfile.extractor
return capa.features.extractors.dnfile.extractor.DnfileFeatureExtractor(path)
def extract_global_features(extractor):
@@ -244,6 +251,10 @@ def get_data_path_by_name(name):
return os.path.join(CD, "data", "b9f5bd514485fb06da39beff051b9fdc.exe_")
elif name.startswith("mixed-mode-64"):
return os.path.join(DNFILE_TESTFILES, "mixed-mode", "ModuleCode", "bin", "ModuleCode_amd64.exe")
elif name.startswith("hello-world"):
return os.path.join(DNFILE_TESTFILES, "hello-world", "hello-world.exe")
elif name.startswith("_1c444"):
return os.path.join(CD, "data", "dotnet", "1c444ebeba24dcba8628b7dfe5fec7c6.exe_")
else:
raise ValueError("unexpected sample fixture: %s" % name)
@@ -660,6 +671,25 @@ FEATURE_PRESENCE_TESTS_DOTNET = sorted(
("mixed-mode-64", "file", Arch(ARCH_I386), False),
("b9f5b", "file", OS(OS_ANY), True),
("b9f5b", "file", Format(FORMAT_DOTNET), True),
("hello-world", "function=0x250", capa.features.common.String("Hello World!"), True),
("hello-world", "function=0x250, bb=0x250, insn=0x252", capa.features.common.String("Hello World!"), True),
("hello-world", "function=0x250", capa.features.insn.API("System.Console::WriteLine"), True),
("hello-world", "file", capa.features.file.Import("System.Console::WriteLine"), True),
("_1c444", "file", capa.features.file.Import("gdi32.CreateCompatibleBitmap"), True),
("_1c444", "file", capa.features.file.Import("CreateCompatibleBitmap"), True),
("_1c444", "file", capa.features.file.Import("gdi32::CreateCompatibleBitmap"), False),
("_1c444", "function=0x1F68", capa.features.insn.API("GetWindowDC"), True),
("_1c444", "function=0x1F68", capa.features.insn.API("user32.GetWindowDC"), True),
("_1c444", "function=0x1F68", capa.features.insn.Number(0xCC0020), True),
("_1c444", "function=0x1F68", capa.features.insn.Number(0x0), True),
("_1c444", "function=0x1F68", capa.features.insn.Number(0x1), False),
(
"_1c444",
"function=0x1F68, bb=0x1F68, insn=0x1FF9",
capa.features.insn.API("System.Drawing.Image::FromHbitmap"),
True,
),
("_1c444", "function=0x1F68, bb=0x1F68, insn=0x1FF9", capa.features.insn.API("FromHbitmap"), False),
],
# order tests by (file, item)
# so that our LRU cache is most effective.
@@ -681,6 +711,9 @@ FEATURE_COUNT_TESTS = [
]
FEATURE_COUNT_TESTS_DOTNET = [] # type: ignore
def do_test_feature_presence(get_extractor, sample, scope, feature, expected):
extractor = get_extractor(sample)
features = scope(extractor)
@@ -781,10 +814,20 @@ def pingtaest_extractor():
@pytest.fixture
def b9f5b_dnfile_extractor():
return get_dnfile_extractor(get_data_path_by_name("b9f5b"))
def b9f5b_dotnetfile_extractor():
return get_dotnetfile_extractor(get_data_path_by_name("b9f5b"))
@pytest.fixture
def mixed_mode_64_dnfile_extractor():
return get_dnfile_extractor(get_data_path_by_name("mixed-mode-64"))
def mixed_mode_64_dotnetfile_extractor():
return get_dotnetfile_extractor(get_data_path_by_name("mixed-mode-64"))
@pytest.fixture
def hello_world_dnfile_extractor():
return get_dnfile_extractor(get_data_path_by_name("hello-world"))
@pytest.fixture
def _1c444_dnfile_extractor():
return get_dnfile_extractor(get_data_path_by_name("1c444..."))

View File

@@ -0,0 +1,30 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import pytest
import fixtures
from fixtures import *
from fixtures import parametrize
@parametrize(
"sample,scope,feature,expected",
fixtures.FEATURE_PRESENCE_TESTS_DOTNET,
indirect=["sample", "scope"],
)
def test_dnfile_features(sample, scope, feature, expected):
fixtures.do_test_feature_presence(fixtures.get_dnfile_extractor, sample, scope, feature, expected)
@parametrize(
"sample,scope,feature,expected",
fixtures.FEATURE_COUNT_TESTS_DOTNET,
indirect=["sample", "scope"],
)
def test_dnfile_feature_counts(sample, scope, feature, expected):
fixtures.do_test_feature_count(fixtures.get_dnfile_extractor, sample, scope, feature, expected)

View File

@@ -0,0 +1,43 @@
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import pytest
import fixtures
from fixtures import *
from fixtures import parametrize
@parametrize(
"sample,scope,feature,expected",
fixtures.FEATURE_PRESENCE_TESTS_DOTNET,
indirect=["sample", "scope"],
)
def test_dotnetfile_features(sample, scope, feature, expected):
if scope.__name__ != "file":
pytest.xfail("dotnetfile only extracts file scope features")
if isinstance(feature, capa.features.file.FunctionName):
pytest.xfail("dotnetfile doesn't extract function names")
fixtures.do_test_feature_presence(fixtures.get_dotnetfile_extractor, sample, scope, feature, expected)
@parametrize(
"extractor,function,expected",
[
("b9f5b_dotnetfile_extractor", "is_dotnet_file", True),
("b9f5b_dotnetfile_extractor", "is_mixed_mode", False),
("mixed_mode_64_dotnetfile_extractor", "is_mixed_mode", True),
("b9f5b_dotnetfile_extractor", "get_entry_point", 0x6000007),
("b9f5b_dotnetfile_extractor", "get_runtime_version", (2, 5)),
("b9f5b_dotnetfile_extractor", "get_meta_version_string", "v2.0.50727"),
],
)
def test_dotnetfile_extractor(request, extractor, function, expected):
extractor_function = getattr(request.getfixturevalue(extractor), function)
assert extractor_function() == expected