mirror of
https://github.com/mandiant/capa.git
synced 2025-12-12 15:49:46 -08:00
Merge branch 'master' of github.com:mandiant/capa into feature-981
This commit is contained in:
3
.github/mypy/mypy.ini
vendored
3
.github/mypy/mypy.ini
vendored
@@ -74,3 +74,6 @@ ignore_missing_imports = True
|
||||
|
||||
[mypy-elftools.*]
|
||||
ignore_missing_imports = True
|
||||
|
||||
[mypy-dncil.*]
|
||||
ignore_missing_imports = True
|
||||
@@ -9,6 +9,8 @@
|
||||
- add new feature "operand[{0, 1, 2}].offset" for matching instruction operand offsets #767 @williballenthin
|
||||
- main: detect dotnet binaries #955 @mr-tz
|
||||
- render: support Addresses that aren't simple integers, like .NET token+offset #981 @williballenthin
|
||||
- extract additional offset/number features in certain circumstances #320 @williballenthin
|
||||
- add detection and basic feature extraction for dotnet #987 @mr-tz, @mike-hunhoff, @williballenthin
|
||||
|
||||
### Breaking Changes
|
||||
|
||||
|
||||
@@ -12,6 +12,8 @@ import capa.features.extractors.pefile
|
||||
from capa.features.common import OS, FORMAT_PE, FORMAT_ELF, OS_WINDOWS, FORMAT_FREEZE, Arch, Format, String, Feature
|
||||
from capa.features.freeze import is_freeze
|
||||
from capa.features.address import NO_ADDRESS, Address, FileOffsetAddress
|
||||
from capa.features.common import OS, FORMAT_PE, FORMAT_ELF, OS_WINDOWS, FORMAT_FREEZE, Arch, Format, String
|
||||
from capa.features.freeze import is_freeze
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
0
capa/features/extractors/dnfile/__init__.py
Normal file
0
capa/features/extractors/dnfile/__init__.py
Normal file
70
capa/features/extractors/dnfile/extractor.py
Normal file
70
capa/features/extractors/dnfile/extractor.py
Normal file
@@ -0,0 +1,70 @@
|
||||
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Any, List, Tuple
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from capa.features.common import Feature
|
||||
|
||||
import dnfile
|
||||
|
||||
import capa.features.extractors
|
||||
import capa.features.extractors.dnfile.file
|
||||
import capa.features.extractors.dnfile.insn
|
||||
from capa.features.extractors.base_extractor import FeatureExtractor
|
||||
from capa.features.extractors.dnfile.helpers import get_dotnet_managed_method_bodies
|
||||
|
||||
|
||||
class DnfileFeatureExtractor(FeatureExtractor):
|
||||
def __init__(self, path: str):
|
||||
super(DnfileFeatureExtractor, self).__init__()
|
||||
self.pe: dnfile.dnPE = dnfile.dnPE(path)
|
||||
|
||||
# pre-compute these because we'll yield them at *every* scope.
|
||||
self.global_features: List[Tuple[Feature, int]] = []
|
||||
self.global_features.extend(capa.features.extractors.dotnetfile.extract_file_os(pe=self.pe))
|
||||
self.global_features.extend(capa.features.extractors.dotnetfile.extract_file_arch(pe=self.pe))
|
||||
|
||||
def get_base_address(self):
|
||||
return 0x0
|
||||
|
||||
def extract_global_features(self):
|
||||
yield from self.global_features
|
||||
|
||||
def extract_file_features(self):
|
||||
yield from capa.features.extractors.dnfile.file.extract_features(self.pe)
|
||||
|
||||
def get_functions(self):
|
||||
# data structure shared across functions yielded here.
|
||||
# useful for caching analysis relevant across a single workspace.
|
||||
ctx = {}
|
||||
ctx["pe"] = self.pe
|
||||
|
||||
for f in get_dotnet_managed_method_bodies(self.pe):
|
||||
setattr(f, "ctx", ctx)
|
||||
yield f
|
||||
|
||||
def extract_function_features(self, f):
|
||||
# TODO
|
||||
yield from []
|
||||
|
||||
def get_basic_blocks(self, f):
|
||||
# each dotnet method is considered 1 basic block
|
||||
yield f
|
||||
|
||||
def extract_basic_block_features(self, f, bb):
|
||||
# we don't support basic block features
|
||||
yield from []
|
||||
|
||||
def get_instructions(self, f, bb):
|
||||
yield from f.instructions
|
||||
|
||||
def extract_insn_features(self, f, bb, insn):
|
||||
yield from capa.features.extractors.dnfile.insn.extract_features(f, bb, insn)
|
||||
40
capa/features/extractors/dnfile/file.py
Normal file
40
capa/features/extractors/dnfile/file.py
Normal file
@@ -0,0 +1,40 @@
|
||||
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Tuple, Iterator
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import dnfile
|
||||
from capa.features.common import Feature, Format
|
||||
from capa.features.file import Import
|
||||
|
||||
import capa.features.extractors
|
||||
|
||||
|
||||
def extract_file_import_names(pe: dnfile.dnPE) -> Iterator[Tuple[Import, int]]:
|
||||
yield from capa.features.extractors.dotnetfile.extract_file_import_names(pe)
|
||||
|
||||
|
||||
def extract_file_format(pe: dnfile.dnPE) -> Iterator[Tuple[Format, int]]:
|
||||
yield from capa.features.extractors.dotnetfile.extract_file_format(pe=pe)
|
||||
|
||||
|
||||
def extract_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, int]]:
|
||||
for file_handler in FILE_HANDLERS:
|
||||
for (feature, token) in file_handler(pe):
|
||||
yield feature, token
|
||||
|
||||
|
||||
FILE_HANDLERS = (
|
||||
extract_file_import_names,
|
||||
# TODO extract_file_strings,
|
||||
# TODO extract_file_function_names,
|
||||
extract_file_format,
|
||||
)
|
||||
169
capa/features/extractors/dnfile/helpers.py
Normal file
169
capa/features/extractors/dnfile/helpers.py
Normal file
@@ -0,0 +1,169 @@
|
||||
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Any, Tuple, Iterator, Optional
|
||||
|
||||
import dnfile
|
||||
from dncil.cil.body import CilMethodBody
|
||||
from dncil.cil.error import MethodBodyFormatError
|
||||
from dncil.clr.token import Token, StringToken, InvalidToken
|
||||
from dncil.cil.body.reader import CilMethodBodyReaderBase
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# key indexes to dotnet metadata tables
|
||||
DOTNET_META_TABLES_BY_INDEX = {table.value: table.name for table in dnfile.enums.MetadataTables}
|
||||
|
||||
|
||||
class DnfileMethodBodyReader(CilMethodBodyReaderBase):
|
||||
def __init__(self, pe: dnfile.dnPE, row: dnfile.mdtable.MethodDefRow):
|
||||
self.pe: dnfile.dnPE = pe
|
||||
self.offset: int = self.pe.get_offset_from_rva(row.Rva)
|
||||
|
||||
def read(self, n: int) -> bytes:
|
||||
data: bytes = self.pe.get_data(self.pe.get_rva_from_offset(self.offset), n)
|
||||
self.offset += n
|
||||
return data
|
||||
|
||||
def tell(self) -> int:
|
||||
return self.offset
|
||||
|
||||
def seek(self, offset: int) -> int:
|
||||
self.offset = offset
|
||||
return self.offset
|
||||
|
||||
|
||||
def calculate_dotnet_token_value(table: int, rid: int) -> int:
|
||||
return ((table & 0xFF) << Token.TABLE_SHIFT) | (rid & Token.RID_MASK)
|
||||
|
||||
|
||||
def resolve_dotnet_token(pe: dnfile.dnPE, token: Token) -> Any:
|
||||
"""map generic token to string or table row"""
|
||||
if isinstance(token, StringToken):
|
||||
user_string: Optional[str] = read_dotnet_user_string(pe, token)
|
||||
if user_string is None:
|
||||
return InvalidToken(token.value)
|
||||
return user_string
|
||||
|
||||
table_name: str = DOTNET_META_TABLES_BY_INDEX.get(token.table, "")
|
||||
if not table_name:
|
||||
# table_index is not valid
|
||||
return InvalidToken(token.value)
|
||||
|
||||
table: Any = getattr(pe.net.mdtables, table_name, None)
|
||||
if table is None:
|
||||
# table index is valid but table is not present
|
||||
return InvalidToken(token.value)
|
||||
|
||||
try:
|
||||
return table.rows[token.rid - 1]
|
||||
except IndexError:
|
||||
# table index is valid but row index is not valid
|
||||
return InvalidToken(token.value)
|
||||
|
||||
|
||||
def read_dotnet_method_body(pe: dnfile.dnPE, row: dnfile.mdtable.MethodDefRow) -> Optional[CilMethodBody]:
|
||||
"""read dotnet method body"""
|
||||
try:
|
||||
return CilMethodBody(DnfileMethodBodyReader(pe, row))
|
||||
except MethodBodyFormatError as e:
|
||||
logger.warn("failed to parse managed method body @ 0x%08x (%s)" % (row.Rva, e))
|
||||
return None
|
||||
|
||||
|
||||
def read_dotnet_user_string(pe: dnfile.dnPE, token: StringToken) -> Optional[str]:
|
||||
"""read user string from #US stream"""
|
||||
try:
|
||||
user_string: Optional[dnfile.stream.UserString] = pe.net.user_strings.get_us(token.rid)
|
||||
except UnicodeDecodeError as e:
|
||||
logger.warn("failed to decode #US stream index 0x%08x (%s)" % (token.rid, e))
|
||||
return None
|
||||
if user_string is None:
|
||||
return None
|
||||
return user_string.value
|
||||
|
||||
|
||||
def get_dotnet_managed_imports(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]:
|
||||
"""get managed imports from MemberRef table
|
||||
|
||||
see https://www.ntcore.com/files/dotnetformat.htm
|
||||
|
||||
10 - MemberRef Table
|
||||
Each row represents an imported method
|
||||
Class (index into the TypeRef, ModuleRef, MethodDef, TypeSpec or TypeDef tables)
|
||||
Name (index into String heap)
|
||||
01 - TypeRef Table
|
||||
Each row represents an imported class, its namespace and the assembly which contains it
|
||||
TypeName (index into String heap)
|
||||
TypeNamespace (index into String heap)
|
||||
"""
|
||||
if not hasattr(pe.net.mdtables, "MemberRef"):
|
||||
return
|
||||
|
||||
for (rid, row) in enumerate(pe.net.mdtables.MemberRef):
|
||||
if not isinstance(row.Class.row, (dnfile.mdtable.TypeRefRow,)):
|
||||
continue
|
||||
|
||||
token: int = calculate_dotnet_token_value(dnfile.enums.MetadataTables.MemberRef.value, rid + 1)
|
||||
# like System.IO.File::OpenRead
|
||||
imp: str = f"{row.Class.row.TypeNamespace}.{row.Class.row.TypeName}::{row.Name}"
|
||||
|
||||
yield token, imp
|
||||
|
||||
|
||||
def get_dotnet_unmanaged_imports(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]:
|
||||
"""get unmanaged imports from ImplMap table
|
||||
|
||||
see https://www.ntcore.com/files/dotnetformat.htm
|
||||
|
||||
28 - ImplMap Table
|
||||
ImplMap table holds information about unmanaged methods that can be reached from managed code, using PInvoke dispatch
|
||||
MemberForwarded (index into the Field or MethodDef table; more precisely, a MemberForwarded coded index)
|
||||
ImportName (index into the String heap)
|
||||
ImportScope (index into the ModuleRef table)
|
||||
"""
|
||||
if not hasattr(pe.net.mdtables, "ImplMap"):
|
||||
return
|
||||
|
||||
for row in pe.net.mdtables.ImplMap:
|
||||
dll: str = row.ImportScope.row.Name
|
||||
symbol: str = row.ImportName
|
||||
|
||||
# ECMA says "Each row of the ImplMap table associates a row in the MethodDef table (MemberForwarded) with the
|
||||
# name of a routine (ImportName) in some unmanaged DLL (ImportScope)"; so we calculate and map the MemberForwarded
|
||||
# MethodDef table token to help us later record native import method calls made from CIL
|
||||
token: int = calculate_dotnet_token_value(row.MemberForwarded.table.number, row.MemberForwarded.row_index)
|
||||
|
||||
# like Kernel32.dll
|
||||
if dll and "." in dll:
|
||||
dll = dll.split(".")[0]
|
||||
|
||||
# like kernel32.CreateFileA
|
||||
imp: str = f"{dll}.{symbol}"
|
||||
|
||||
yield token, imp
|
||||
|
||||
|
||||
def get_dotnet_managed_method_bodies(pe: dnfile.dnPE) -> Iterator[CilMethodBody]:
|
||||
"""get managed methods from MethodDef table"""
|
||||
if not hasattr(pe.net.mdtables, "MethodDef"):
|
||||
return
|
||||
|
||||
for row in pe.net.mdtables.MethodDef:
|
||||
if not row.ImplFlags.miIL or any((row.Flags.mdAbstract, row.Flags.mdPinvokeImpl)):
|
||||
# skip methods that do not have a method body
|
||||
continue
|
||||
|
||||
body: Optional[CilMethodBody] = read_dotnet_method_body(pe, row)
|
||||
if body is None:
|
||||
continue
|
||||
|
||||
yield body
|
||||
96
capa/features/extractors/dnfile/insn.py
Normal file
96
capa/features/extractors/dnfile/insn.py
Normal file
@@ -0,0 +1,96 @@
|
||||
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Dict, Tuple, Iterator, Optional
|
||||
from itertools import chain
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from dncil.cil.instruction import Instruction
|
||||
from dncil.cil.body import CilMethodBody
|
||||
from capa.features.common import Feature
|
||||
|
||||
from dncil.clr.token import StringToken
|
||||
from dncil.cil.opcode import OpCodes
|
||||
|
||||
import capa.features.extractors.helpers
|
||||
from capa.features.insn import API, Number
|
||||
from capa.features.common import String
|
||||
from capa.features.extractors.dnfile.helpers import (
|
||||
read_dotnet_user_string,
|
||||
get_dotnet_managed_imports,
|
||||
get_dotnet_unmanaged_imports,
|
||||
)
|
||||
|
||||
|
||||
def get_imports(ctx: Dict) -> Dict:
|
||||
if "imports_cache" not in ctx:
|
||||
ctx["imports_cache"] = {
|
||||
token: imp
|
||||
for (token, imp) in chain(get_dotnet_managed_imports(ctx["pe"]), get_dotnet_unmanaged_imports(ctx["pe"]))
|
||||
}
|
||||
return ctx["imports_cache"]
|
||||
|
||||
|
||||
def extract_insn_api_features(f: CilMethodBody, bb: CilMethodBody, insn: Instruction) -> Iterator[Tuple[API, int]]:
|
||||
"""parse instruction API features"""
|
||||
if insn.opcode not in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp, OpCodes.Calli):
|
||||
return
|
||||
|
||||
name: str = get_imports(f.ctx).get(insn.operand.value, "")
|
||||
if not name:
|
||||
return
|
||||
|
||||
if "::" in name:
|
||||
# like System.IO.File::OpenRead
|
||||
yield API(name), insn.offset
|
||||
else:
|
||||
# like kernel32.CreateFileA
|
||||
dll, _, symbol = name.rpartition(".")
|
||||
for name_variant in capa.features.extractors.helpers.generate_symbols(dll, symbol):
|
||||
yield API(name_variant), insn.offset
|
||||
|
||||
|
||||
def extract_insn_number_features(
|
||||
f: CilMethodBody, bb: CilMethodBody, insn: Instruction
|
||||
) -> Iterator[Tuple[Number, int]]:
|
||||
"""parse instruction number features"""
|
||||
if insn.is_ldc():
|
||||
yield Number(insn.get_ldc()), insn.offset
|
||||
|
||||
|
||||
def extract_insn_string_features(
|
||||
f: CilMethodBody, bb: CilMethodBody, insn: Instruction
|
||||
) -> Iterator[Tuple[String, int]]:
|
||||
"""parse instruction string features"""
|
||||
if not insn.is_ldstr():
|
||||
return
|
||||
|
||||
if not isinstance(insn.operand, StringToken):
|
||||
return
|
||||
|
||||
user_string: Optional[str] = read_dotnet_user_string(f.ctx["pe"], insn.operand)
|
||||
if user_string is None:
|
||||
return
|
||||
|
||||
yield String(user_string), insn.offset
|
||||
|
||||
|
||||
def extract_features(f: CilMethodBody, bb: CilMethodBody, insn: Instruction) -> Iterator[Tuple[Feature, int]]:
|
||||
"""extract instruction features"""
|
||||
for inst_handler in INSTRUCTION_HANDLERS:
|
||||
for (feature, offset) in inst_handler(f, bb, insn):
|
||||
yield feature, offset
|
||||
|
||||
|
||||
INSTRUCTION_HANDLERS = (
|
||||
extract_insn_api_features,
|
||||
extract_insn_number_features,
|
||||
extract_insn_string_features,
|
||||
)
|
||||
129
capa/features/extractors/dotnetfile.py
Normal file
129
capa/features/extractors/dotnetfile.py
Normal file
@@ -0,0 +1,129 @@
|
||||
import logging
|
||||
from typing import Tuple, Iterator
|
||||
from itertools import chain
|
||||
|
||||
import dnfile
|
||||
import pefile
|
||||
|
||||
import capa.features.extractors.helpers
|
||||
from capa.features.file import Import
|
||||
from capa.features.common import OS, OS_ANY, ARCH_ANY, ARCH_I386, ARCH_AMD64, FORMAT_DOTNET, Arch, Format, Feature
|
||||
from capa.features.extractors.base_extractor import FeatureExtractor
|
||||
from capa.features.extractors.dnfile.helpers import get_dotnet_managed_imports, get_dotnet_unmanaged_imports
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def extract_file_format(**kwargs) -> Iterator[Tuple[Format, int]]:
|
||||
yield Format(FORMAT_DOTNET), 0x0
|
||||
|
||||
|
||||
def extract_file_import_names(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Import, int]]:
|
||||
for (token, imp) in chain(get_dotnet_managed_imports(pe), get_dotnet_unmanaged_imports(pe)):
|
||||
if "::" in imp:
|
||||
# like System.IO.File::OpenRead
|
||||
yield Import(imp), token
|
||||
else:
|
||||
# like kernel32.CreateFileA
|
||||
dll, _, symbol = imp.rpartition(".")
|
||||
for symbol_variant in capa.features.extractors.helpers.generate_symbols(dll, symbol):
|
||||
yield Import(symbol_variant), token
|
||||
|
||||
|
||||
def extract_file_os(**kwargs) -> Iterator[Tuple[OS, int]]:
|
||||
yield OS(OS_ANY), 0x0
|
||||
|
||||
|
||||
def extract_file_arch(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Arch, int]]:
|
||||
# to distinguish in more detail, see https://stackoverflow.com/a/23614024/10548020
|
||||
# .NET 4.5 added option: any CPU, 32-bit preferred
|
||||
if pe.net.Flags.CLR_32BITREQUIRED and pe.PE_TYPE == pefile.OPTIONAL_HEADER_MAGIC_PE:
|
||||
yield Arch(ARCH_I386), 0x0
|
||||
elif not pe.net.Flags.CLR_32BITREQUIRED and pe.PE_TYPE == pefile.OPTIONAL_HEADER_MAGIC_PE_PLUS:
|
||||
yield Arch(ARCH_AMD64), 0x0
|
||||
else:
|
||||
yield Arch(ARCH_ANY), 0x0
|
||||
|
||||
|
||||
def extract_file_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, int]]:
|
||||
for file_handler in FILE_HANDLERS:
|
||||
for feature, va in file_handler(pe=pe): # type: ignore
|
||||
yield feature, va
|
||||
|
||||
|
||||
FILE_HANDLERS = (
|
||||
extract_file_import_names,
|
||||
# TODO extract_file_strings,
|
||||
# TODO extract_file_function_names,
|
||||
extract_file_format,
|
||||
)
|
||||
|
||||
|
||||
def extract_global_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, int]]:
|
||||
for handler in GLOBAL_HANDLERS:
|
||||
for feature, va in handler(pe=pe): # type: ignore
|
||||
yield feature, va
|
||||
|
||||
|
||||
GLOBAL_HANDLERS = (
|
||||
extract_file_os,
|
||||
extract_file_arch,
|
||||
)
|
||||
|
||||
|
||||
class DotnetFileFeatureExtractor(FeatureExtractor):
|
||||
def __init__(self, path: str):
|
||||
super(DotnetFileFeatureExtractor, self).__init__()
|
||||
self.path: str = path
|
||||
self.pe: dnfile.dnPE = dnfile.dnPE(path)
|
||||
|
||||
def get_base_address(self) -> int:
|
||||
return 0x0
|
||||
|
||||
def get_entry_point(self) -> int:
|
||||
# self.pe.net.Flags.CLT_NATIVE_ENTRYPOINT
|
||||
# True: native EP: Token
|
||||
# False: managed EP: RVA
|
||||
return self.pe.net.struct.EntryPointTokenOrRva
|
||||
|
||||
def extract_global_features(self):
|
||||
yield from extract_global_features(self.pe)
|
||||
|
||||
def extract_file_features(self):
|
||||
yield from extract_file_features(self.pe)
|
||||
|
||||
def is_dotnet_file(self) -> bool:
|
||||
return bool(self.pe.net)
|
||||
|
||||
def is_mixed_mode(self) -> bool:
|
||||
return not bool(self.pe.net.Flags.CLR_ILONLY)
|
||||
|
||||
def get_runtime_version(self) -> Tuple[int, int]:
|
||||
return self.pe.net.struct.MajorRuntimeVersion, self.pe.net.struct.MinorRuntimeVersion
|
||||
|
||||
def get_meta_version_string(self) -> str:
|
||||
return self.pe.net.metadata.struct.Version.rstrip(b"\x00").decode("utf-8")
|
||||
|
||||
def get_functions(self):
|
||||
raise NotImplementedError("DotnetFileFeatureExtractor can only be used to extract file features")
|
||||
|
||||
def extract_function_features(self, f):
|
||||
raise NotImplementedError("DotnetFileFeatureExtractor can only be used to extract file features")
|
||||
|
||||
def get_basic_blocks(self, f):
|
||||
raise NotImplementedError("DotnetFileFeatureExtractor can only be used to extract file features")
|
||||
|
||||
def extract_basic_block_features(self, f, bb):
|
||||
raise NotImplementedError("DotnetFileFeatureExtractor can only be used to extract file features")
|
||||
|
||||
def get_instructions(self, f, bb):
|
||||
raise NotImplementedError("DotnetFileFeatureExtractor can only be used to extract file features")
|
||||
|
||||
def extract_insn_features(self, f, bb, insn):
|
||||
raise NotImplementedError("DotnetFileFeatureExtractor can only be used to extract file features")
|
||||
|
||||
def is_library_function(self, va):
|
||||
raise NotImplementedError("DotnetFileFeatureExtractor can only be used to extract file features")
|
||||
|
||||
def get_function_name(self, va):
|
||||
raise NotImplementedError("DotnetFileFeatureExtractor can only be used to extract file features")
|
||||
@@ -51,6 +51,9 @@ def generate_symbols(dll: str, symbol: str) -> Iterator[str]:
|
||||
- CreateFileA
|
||||
- CreateFile
|
||||
"""
|
||||
# normalize dll name
|
||||
dll = dll.lower()
|
||||
|
||||
# kernel32.CreateFileA
|
||||
yield "%s.%s" % (dll, symbol)
|
||||
|
||||
|
||||
@@ -14,6 +14,8 @@ import capa.features.extractors.helpers
|
||||
import capa.features.extractors.ida.helpers
|
||||
from capa.features.insn import API, Number, Offset, Mnemonic, OperandNumber, OperandOffset
|
||||
from capa.features.common import MAX_BYTES_FEATURE_SIZE, THUNK_CHAIN_DEPTH_DELTA, Bytes, String, Characteristic
|
||||
from capa.features.insn import API, MAX_STRUCTURE_SIZE, Number, Offset, Mnemonic, OperandNumber, OperandOffset
|
||||
from capa.features.common import MAX_BYTES_FEATURE_SIZE, THUNK_CHAIN_DEPTH_DELTA, Bytes, String, Characteristic
|
||||
|
||||
# security cookie checks may perform non-zeroing XORs, these are expected within a certain
|
||||
# byte range within the first and returning basic blocks, this helps to reduce FP features
|
||||
|
||||
@@ -10,6 +10,8 @@ from capa.features.insn import API, MAX_STRUCTURE_SIZE, Number, Offset, Mnemonic
|
||||
from capa.features.common import MAX_BYTES_FEATURE_SIZE, THUNK_CHAIN_DEPTH_DELTA, Bytes, String, Feature, Characteristic
|
||||
from capa.features.address import Address, AbsoluteVirtualAddress
|
||||
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle
|
||||
from capa.features.insn import API, MAX_STRUCTURE_SIZE, Number, Offset, Mnemonic, OperandNumber, OperandOffset
|
||||
from capa.features.common import MAX_BYTES_FEATURE_SIZE, THUNK_CHAIN_DEPTH_DELTA, Bytes, String, Characteristic
|
||||
|
||||
# security cookie checks may perform non-zeroing XORs, these are expected within a certain
|
||||
# byte range within the first and returning basic blocks, this helps to reduce FP features
|
||||
@@ -271,10 +273,10 @@ def is_security_cookie(f, bb, insn):
|
||||
for index, block in enumerate(f.getBlocks()):
|
||||
# expect security cookie init in first basic block within first bytes (instructions)
|
||||
block_instructions = [i for i in block.getInstructions()]
|
||||
if index == 0 and ih.address < (block_instructions[0].offset + SECURITY_COOKIE_BYTES_DELTA):
|
||||
if index == 0 and insn.address < (block_instructions[0].offset + SECURITY_COOKIE_BYTES_DELTA):
|
||||
return True
|
||||
# ... or within last bytes (instructions) before a return
|
||||
if block_instructions[-1].mnemonic.startswith("ret") and ih.address > (
|
||||
if block_instructions[-1].mnemonic.startswith("ret") and insn.address > (
|
||||
block_instructions[-1].offset - SECURITY_COOKIE_BYTES_DELTA
|
||||
):
|
||||
return True
|
||||
|
||||
@@ -19,10 +19,26 @@ import envi.archs.amd64.disasm
|
||||
|
||||
import capa.features.extractors.helpers
|
||||
import capa.features.extractors.viv.helpers
|
||||
<<<<<<< HEAD
|
||||
from capa.features.insn import API, MAX_STRUCTURE_SIZE, Number, Offset, Mnemonic, OperandNumber, OperandOffset
|
||||
from capa.features.common import MAX_BYTES_FEATURE_SIZE, THUNK_CHAIN_DEPTH_DELTA, Bytes, String, Feature, Characteristic
|
||||
from capa.features.address import Address, AbsoluteVirtualAddress
|
||||
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle
|
||||
||||||| de312d8
|
||||
from capa.features.insn import API, Number, Offset, Mnemonic, OperandNumber, OperandOffset
|
||||
from capa.features.common import (
|
||||
BITNESS_X32,
|
||||
BITNESS_X64,
|
||||
MAX_BYTES_FEATURE_SIZE,
|
||||
THUNK_CHAIN_DEPTH_DELTA,
|
||||
Bytes,
|
||||
String,
|
||||
Characteristic,
|
||||
)
|
||||
=======
|
||||
from capa.features.insn import API, MAX_STRUCTURE_SIZE, Number, Offset, Mnemonic, OperandNumber, OperandOffset
|
||||
from capa.features.common import MAX_BYTES_FEATURE_SIZE, THUNK_CHAIN_DEPTH_DELTA, Bytes, String, Characteristic
|
||||
>>>>>>> 580a2d7e4519ea5d353650d66468020968f0f27d
|
||||
from capa.features.extractors.viv.indirect_calls import NotFoundError, resolve_indirect_call
|
||||
|
||||
# security cookie checks may perform non-zeroing XORs, these are expected within a certain
|
||||
@@ -30,9 +46,23 @@ from capa.features.extractors.viv.indirect_calls import NotFoundError, resolve_i
|
||||
SECURITY_COOKIE_BYTES_DELTA = 0x40
|
||||
|
||||
|
||||
<<<<<<< HEAD
|
||||
def interface_extract_instruction_XXX(
|
||||
f: FunctionHandle, bb: BBHandle, insn: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
||||||| de312d8
|
||||
def get_bitness(vw):
|
||||
bitness = vw.getMeta("Architecture")
|
||||
if bitness == "i386":
|
||||
return BITNESS_X32
|
||||
elif bitness == "amd64":
|
||||
return BITNESS_X64
|
||||
|
||||
|
||||
def interface_extract_instruction_XXX(f, bb, insn):
|
||||
=======
|
||||
def interface_extract_instruction_XXX(f, bb, insn):
|
||||
>>>>>>> 580a2d7e4519ea5d353650d66468020968f0f27d
|
||||
"""
|
||||
parse features from the given instruction.
|
||||
|
||||
|
||||
@@ -13,11 +13,6 @@ from capa.features.common import Feature
|
||||
|
||||
class API(Feature):
|
||||
def __init__(self, name: str, description=None):
|
||||
# Downcase library name if given
|
||||
if "." in name:
|
||||
modname, _, impname = name.rpartition(".")
|
||||
name = modname.lower() + "." + impname
|
||||
|
||||
super(API, self).__init__(name, description=description)
|
||||
|
||||
|
||||
|
||||
26
capa/main.py
26
capa/main.py
@@ -43,6 +43,7 @@ import capa.features.extractors.common
|
||||
import capa.features.extractors.pefile
|
||||
import capa.features.extractors.dnfile_
|
||||
import capa.features.extractors.elffile
|
||||
import capa.features.extractors.dotnetfile
|
||||
from capa.rules import Rule, Scope, RuleSet
|
||||
from capa.engine import FeatureSet, MatchResults
|
||||
from capa.helpers import (
|
||||
@@ -64,6 +65,24 @@ from capa.features.common import (
|
||||
FORMAT_FREEZE,
|
||||
)
|
||||
from capa.features.address import NO_ADDRESS
|
||||
from capa.helpers import (
|
||||
get_format,
|
||||
get_file_taste,
|
||||
get_auto_format,
|
||||
log_unsupported_os_error,
|
||||
log_unsupported_arch_error,
|
||||
log_unsupported_format_error,
|
||||
)
|
||||
from capa.exceptions import UnsupportedOSError, UnsupportedArchError, UnsupportedFormatError, UnsupportedRuntimeError
|
||||
from capa.features.common import (
|
||||
FORMAT_PE,
|
||||
FORMAT_ELF,
|
||||
FORMAT_AUTO,
|
||||
FORMAT_SC32,
|
||||
FORMAT_SC64,
|
||||
FORMAT_DOTNET,
|
||||
FORMAT_FREEZE,
|
||||
)
|
||||
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle, FeatureExtractor
|
||||
|
||||
RULES_PATH_DEFAULT_STRING = "(embedded rules)"
|
||||
@@ -505,8 +524,9 @@ def get_extractor(
|
||||
raise UnsupportedOSError()
|
||||
|
||||
if format_ == FORMAT_DOTNET:
|
||||
# TODO return capa.features.extractors.dotnet.extractor.DnFeatureExtractor(...)
|
||||
raise NotImplementedError("DnFeatureExtractor")
|
||||
import capa.features.extractors.dnfile.extractor
|
||||
|
||||
return capa.features.extractors.dnfile.extractor.DnfileFeatureExtractor(path)
|
||||
|
||||
if backend == "smda":
|
||||
from smda.SmdaConfig import SmdaConfig
|
||||
@@ -1058,7 +1078,7 @@ def main(argv=None):
|
||||
logger.debug("file limitation short circuit, won't analyze fully.")
|
||||
return E_FILE_LIMITATION
|
||||
|
||||
if isinstance(file_extractor, capa.features.extractors.dnfile_.DnfileFeatureExtractor):
|
||||
if isinstance(file_extractor, capa.features.extractors.dotnetfile.DotnetFileFeatureExtractor):
|
||||
format_ = FORMAT_DOTNET
|
||||
|
||||
if format_ == FORMAT_FREEZE:
|
||||
|
||||
@@ -122,6 +122,12 @@ def main(argv=None):
|
||||
log_unsupported_runtime_error()
|
||||
return -1
|
||||
|
||||
for feature, va in extractor.extract_global_features():
|
||||
if va:
|
||||
print("global: 0x%08x: %s" % (va, feature))
|
||||
else:
|
||||
print("global: 0x00000000: %s" % (feature))
|
||||
|
||||
if not args.function:
|
||||
for feature, va in extractor.extract_file_features():
|
||||
if va:
|
||||
|
||||
1
setup.py
1
setup.py
@@ -27,6 +27,7 @@ requirements = [
|
||||
"pefile==2021.9.3",
|
||||
"pyelftools==0.28",
|
||||
"dnfile==0.10.0",
|
||||
"dncil==1.0.0",
|
||||
]
|
||||
|
||||
# this sets __version__
|
||||
|
||||
@@ -136,10 +136,17 @@ def get_pefile_extractor(path):
|
||||
return capa.features.extractors.pefile.PefileFeatureExtractor(path)
|
||||
|
||||
|
||||
def get_dnfile_extractor(path):
|
||||
import capa.features.extractors.dnfile_
|
||||
def get_dotnetfile_extractor(path):
|
||||
import capa.features.extractors.dotnetfile
|
||||
|
||||
return capa.features.extractors.dnfile_.DnfileFeatureExtractor(path)
|
||||
return capa.features.extractors.dotnetfile.DotnetFileFeatureExtractor(path)
|
||||
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def get_dnfile_extractor(path):
|
||||
import capa.features.extractors.dnfile.extractor
|
||||
|
||||
return capa.features.extractors.dnfile.extractor.DnfileFeatureExtractor(path)
|
||||
|
||||
|
||||
def extract_global_features(extractor):
|
||||
@@ -244,6 +251,10 @@ def get_data_path_by_name(name):
|
||||
return os.path.join(CD, "data", "b9f5bd514485fb06da39beff051b9fdc.exe_")
|
||||
elif name.startswith("mixed-mode-64"):
|
||||
return os.path.join(DNFILE_TESTFILES, "mixed-mode", "ModuleCode", "bin", "ModuleCode_amd64.exe")
|
||||
elif name.startswith("hello-world"):
|
||||
return os.path.join(DNFILE_TESTFILES, "hello-world", "hello-world.exe")
|
||||
elif name.startswith("_1c444"):
|
||||
return os.path.join(CD, "data", "dotnet", "1c444ebeba24dcba8628b7dfe5fec7c6.exe_")
|
||||
else:
|
||||
raise ValueError("unexpected sample fixture: %s" % name)
|
||||
|
||||
@@ -660,6 +671,25 @@ FEATURE_PRESENCE_TESTS_DOTNET = sorted(
|
||||
("mixed-mode-64", "file", Arch(ARCH_I386), False),
|
||||
("b9f5b", "file", OS(OS_ANY), True),
|
||||
("b9f5b", "file", Format(FORMAT_DOTNET), True),
|
||||
("hello-world", "function=0x250", capa.features.common.String("Hello World!"), True),
|
||||
("hello-world", "function=0x250, bb=0x250, insn=0x252", capa.features.common.String("Hello World!"), True),
|
||||
("hello-world", "function=0x250", capa.features.insn.API("System.Console::WriteLine"), True),
|
||||
("hello-world", "file", capa.features.file.Import("System.Console::WriteLine"), True),
|
||||
("_1c444", "file", capa.features.file.Import("gdi32.CreateCompatibleBitmap"), True),
|
||||
("_1c444", "file", capa.features.file.Import("CreateCompatibleBitmap"), True),
|
||||
("_1c444", "file", capa.features.file.Import("gdi32::CreateCompatibleBitmap"), False),
|
||||
("_1c444", "function=0x1F68", capa.features.insn.API("GetWindowDC"), True),
|
||||
("_1c444", "function=0x1F68", capa.features.insn.API("user32.GetWindowDC"), True),
|
||||
("_1c444", "function=0x1F68", capa.features.insn.Number(0xCC0020), True),
|
||||
("_1c444", "function=0x1F68", capa.features.insn.Number(0x0), True),
|
||||
("_1c444", "function=0x1F68", capa.features.insn.Number(0x1), False),
|
||||
(
|
||||
"_1c444",
|
||||
"function=0x1F68, bb=0x1F68, insn=0x1FF9",
|
||||
capa.features.insn.API("System.Drawing.Image::FromHbitmap"),
|
||||
True,
|
||||
),
|
||||
("_1c444", "function=0x1F68, bb=0x1F68, insn=0x1FF9", capa.features.insn.API("FromHbitmap"), False),
|
||||
],
|
||||
# order tests by (file, item)
|
||||
# so that our LRU cache is most effective.
|
||||
@@ -681,6 +711,9 @@ FEATURE_COUNT_TESTS = [
|
||||
]
|
||||
|
||||
|
||||
FEATURE_COUNT_TESTS_DOTNET = [] # type: ignore
|
||||
|
||||
|
||||
def do_test_feature_presence(get_extractor, sample, scope, feature, expected):
|
||||
extractor = get_extractor(sample)
|
||||
features = scope(extractor)
|
||||
@@ -781,10 +814,20 @@ def pingtaest_extractor():
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def b9f5b_dnfile_extractor():
|
||||
return get_dnfile_extractor(get_data_path_by_name("b9f5b"))
|
||||
def b9f5b_dotnetfile_extractor():
|
||||
return get_dotnetfile_extractor(get_data_path_by_name("b9f5b"))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mixed_mode_64_dnfile_extractor():
|
||||
return get_dnfile_extractor(get_data_path_by_name("mixed-mode-64"))
|
||||
def mixed_mode_64_dotnetfile_extractor():
|
||||
return get_dotnetfile_extractor(get_data_path_by_name("mixed-mode-64"))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def hello_world_dnfile_extractor():
|
||||
return get_dnfile_extractor(get_data_path_by_name("hello-world"))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def _1c444_dnfile_extractor():
|
||||
return get_dnfile_extractor(get_data_path_by_name("1c444..."))
|
||||
|
||||
30
tests/test_dnfile_features.py
Normal file
30
tests/test_dnfile_features.py
Normal file
@@ -0,0 +1,30 @@
|
||||
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import pytest
|
||||
import fixtures
|
||||
from fixtures import *
|
||||
from fixtures import parametrize
|
||||
|
||||
|
||||
@parametrize(
|
||||
"sample,scope,feature,expected",
|
||||
fixtures.FEATURE_PRESENCE_TESTS_DOTNET,
|
||||
indirect=["sample", "scope"],
|
||||
)
|
||||
def test_dnfile_features(sample, scope, feature, expected):
|
||||
fixtures.do_test_feature_presence(fixtures.get_dnfile_extractor, sample, scope, feature, expected)
|
||||
|
||||
|
||||
@parametrize(
|
||||
"sample,scope,feature,expected",
|
||||
fixtures.FEATURE_COUNT_TESTS_DOTNET,
|
||||
indirect=["sample", "scope"],
|
||||
)
|
||||
def test_dnfile_feature_counts(sample, scope, feature, expected):
|
||||
fixtures.do_test_feature_count(fixtures.get_dnfile_extractor, sample, scope, feature, expected)
|
||||
43
tests/test_dotnetfile_features.py
Normal file
43
tests/test_dotnetfile_features.py
Normal file
@@ -0,0 +1,43 @@
|
||||
# Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import pytest
|
||||
import fixtures
|
||||
from fixtures import *
|
||||
from fixtures import parametrize
|
||||
|
||||
|
||||
@parametrize(
|
||||
"sample,scope,feature,expected",
|
||||
fixtures.FEATURE_PRESENCE_TESTS_DOTNET,
|
||||
indirect=["sample", "scope"],
|
||||
)
|
||||
def test_dotnetfile_features(sample, scope, feature, expected):
|
||||
if scope.__name__ != "file":
|
||||
pytest.xfail("dotnetfile only extracts file scope features")
|
||||
|
||||
if isinstance(feature, capa.features.file.FunctionName):
|
||||
pytest.xfail("dotnetfile doesn't extract function names")
|
||||
|
||||
fixtures.do_test_feature_presence(fixtures.get_dotnetfile_extractor, sample, scope, feature, expected)
|
||||
|
||||
|
||||
@parametrize(
|
||||
"extractor,function,expected",
|
||||
[
|
||||
("b9f5b_dotnetfile_extractor", "is_dotnet_file", True),
|
||||
("b9f5b_dotnetfile_extractor", "is_mixed_mode", False),
|
||||
("mixed_mode_64_dotnetfile_extractor", "is_mixed_mode", True),
|
||||
("b9f5b_dotnetfile_extractor", "get_entry_point", 0x6000007),
|
||||
("b9f5b_dotnetfile_extractor", "get_runtime_version", (2, 5)),
|
||||
("b9f5b_dotnetfile_extractor", "get_meta_version_string", "v2.0.50727"),
|
||||
],
|
||||
)
|
||||
def test_dotnetfile_extractor(request, extractor, function, expected):
|
||||
extractor_function = getattr(request.getfixturevalue(extractor), function)
|
||||
assert extractor_function() == expected
|
||||
Reference in New Issue
Block a user