mirror of
https://github.com/mandiant/capa.git
synced 2025-12-12 15:49:46 -08:00
extractor: introduce standardized handles for function, bb, insn
This commit is contained in:
@@ -7,23 +7,59 @@
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import abc
|
||||
from typing import Tuple, Iterator, SupportsInt
|
||||
import dataclasses
|
||||
from typing import Any, Dict, Tuple, Iterator
|
||||
from dataclasses import dataclass
|
||||
|
||||
from capa.features.common import Feature
|
||||
from capa.features.address import Address, AbsoluteVirtualAddress
|
||||
|
||||
# feature extractors may reference functions, BBs, insns by opaque handle values.
|
||||
# the only requirement of these handles are that they support `__int__`,
|
||||
# so that they can be rendered as addresses.
|
||||
# you can use the `.address` property to get and render the address of the feature.
|
||||
#
|
||||
# these handles are only consumed by routines on
|
||||
# the feature extractor from which they were created.
|
||||
#
|
||||
# int(FunctionHandle) -> function start address
|
||||
# int(BBHandle) -> BasicBlock start address
|
||||
# int(InsnHandle) -> instruction address
|
||||
FunctionHandle = SupportsInt
|
||||
BBHandle = SupportsInt
|
||||
InsnHandle = SupportsInt
|
||||
|
||||
|
||||
@dataclass
|
||||
class FunctionHandle:
|
||||
"""reference to a function recognized by a feature extractor.
|
||||
|
||||
Attributes:
|
||||
address: the address of the function.
|
||||
inner: extractor-specific data.
|
||||
ctx: a context object for the extractor.
|
||||
"""
|
||||
|
||||
address: Address
|
||||
inner: Any
|
||||
ctx: Dict[str, Any] = dataclasses.field(default_factory=dict)
|
||||
|
||||
|
||||
@dataclass
|
||||
class BBHandle:
|
||||
"""reference to a basic block recognized by a feature extractor.
|
||||
|
||||
Attributes:
|
||||
address: the address of the basic block start address.
|
||||
inner: extractor-specific data.
|
||||
"""
|
||||
|
||||
address: Address
|
||||
inner: Any
|
||||
|
||||
|
||||
@dataclass
|
||||
class InsnHandle:
|
||||
"""reference to a instruction recognized by a feature extractor.
|
||||
|
||||
Attributes:
|
||||
address: the address of the instruction address.
|
||||
inner: extractor-specific data.
|
||||
"""
|
||||
|
||||
address: Address
|
||||
inner: Any
|
||||
|
||||
|
||||
class FeatureExtractor:
|
||||
@@ -53,14 +89,14 @@ class FeatureExtractor:
|
||||
super(FeatureExtractor, self).__init__()
|
||||
|
||||
@abc.abstractmethod
|
||||
def get_base_address(self) -> int:
|
||||
def get_base_address(self) -> AbsoluteVirtualAddress:
|
||||
"""
|
||||
fetch the preferred load address at which the sample was analyzed.
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
@abc.abstractmethod
|
||||
def extract_global_features(self) -> Iterator[Tuple[Feature, int]]:
|
||||
def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""
|
||||
extract features found at every scope ("global").
|
||||
|
||||
@@ -71,12 +107,12 @@ class FeatureExtractor:
|
||||
print('0x%x: %s', va, feature)
|
||||
|
||||
yields:
|
||||
Tuple[Feature, int]: feature and its location
|
||||
Tuple[Feature, Address]: feature and its location
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
@abc.abstractmethod
|
||||
def extract_file_features(self) -> Iterator[Tuple[Feature, int]]:
|
||||
def extract_file_features(self) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""
|
||||
extract file-scope features.
|
||||
|
||||
@@ -87,7 +123,7 @@ class FeatureExtractor:
|
||||
print('0x%x: %s', va, feature)
|
||||
|
||||
yields:
|
||||
Tuple[Feature, int]: feature and its location
|
||||
Tuple[Feature, Address]: feature and its location
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
@@ -99,32 +135,33 @@ class FeatureExtractor:
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def is_library_function(self, va: int) -> bool:
|
||||
def is_library_function(self, addr: Address) -> bool:
|
||||
"""
|
||||
is the given address a library function?
|
||||
the backend may implement its own function matching algorithm, or none at all.
|
||||
we accept a VA here, rather than function object, to handle addresses identified in instructions.
|
||||
we accept an address here, rather than function object,
|
||||
to handle addresses identified in instructions.
|
||||
|
||||
this information is used to:
|
||||
- filter out matches in library functions (by default), and
|
||||
- recognize when to fetch symbol names for called (non-API) functions
|
||||
|
||||
args:
|
||||
va (int): the virtual address of a function.
|
||||
addr (Address): the address of a function.
|
||||
|
||||
returns:
|
||||
bool: True if the given address is the start of a library function.
|
||||
"""
|
||||
return False
|
||||
|
||||
def get_function_name(self, va: int) -> str:
|
||||
def get_function_name(self, addr: Address) -> str:
|
||||
"""
|
||||
fetch any recognized name for the given address.
|
||||
this is only guaranteed to return a value when the given function is a recognized library function.
|
||||
we accept a VA here, rather than function object, to handle addresses identified in instructions.
|
||||
|
||||
args:
|
||||
va (int): the virtual address of a function.
|
||||
addr (Address): the address of a function.
|
||||
|
||||
returns:
|
||||
str: the function name
|
||||
@@ -132,10 +169,10 @@ class FeatureExtractor:
|
||||
raises:
|
||||
KeyError: when the given function does not have a name.
|
||||
"""
|
||||
raise KeyError(va)
|
||||
raise KeyError(addr)
|
||||
|
||||
@abc.abstractmethod
|
||||
def extract_function_features(self, f: FunctionHandle) -> Iterator[Tuple[Feature, int]]:
|
||||
def extract_function_features(self, f: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""
|
||||
extract function-scope features.
|
||||
the arguments are opaque values previously provided by `.get_functions()`, etc.
|
||||
@@ -144,14 +181,14 @@ class FeatureExtractor:
|
||||
|
||||
extractor = VivisectFeatureExtractor(vw, path)
|
||||
for function in extractor.get_functions():
|
||||
for feature, va in extractor.extract_function_features(function):
|
||||
print('0x%x: %s', va, feature)
|
||||
for feature, address in extractor.extract_function_features(function):
|
||||
print('0x%x: %s', address, feature)
|
||||
|
||||
args:
|
||||
f [FunctionHandle]: an opaque value previously fetched from `.get_functions()`.
|
||||
|
||||
yields:
|
||||
Tuple[Feature, int]: feature and its location
|
||||
Tuple[Feature, Address]: feature and its location
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
@@ -164,7 +201,7 @@ class FeatureExtractor:
|
||||
raise NotImplementedError()
|
||||
|
||||
@abc.abstractmethod
|
||||
def extract_basic_block_features(self, f: FunctionHandle, bb: BBHandle) -> Iterator[Tuple[Feature, int]]:
|
||||
def extract_basic_block_features(self, f: FunctionHandle, bb: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""
|
||||
extract basic block-scope features.
|
||||
the arguments are opaque values previously provided by `.get_functions()`, etc.
|
||||
@@ -174,15 +211,15 @@ class FeatureExtractor:
|
||||
extractor = VivisectFeatureExtractor(vw, path)
|
||||
for function in extractor.get_functions():
|
||||
for bb in extractor.get_basic_blocks(function):
|
||||
for feature, va in extractor.extract_basic_block_features(function, bb):
|
||||
print('0x%x: %s', va, feature)
|
||||
for feature, address in extractor.extract_basic_block_features(function, bb):
|
||||
print('0x%x: %s', address, feature)
|
||||
|
||||
args:
|
||||
f [FunctionHandle]: an opaque value previously fetched from `.get_functions()`.
|
||||
bb [BBHandle]: an opaque value previously fetched from `.get_basic_blocks()`.
|
||||
|
||||
yields:
|
||||
Tuple[Feature, int]: feature and its location
|
||||
Tuple[Feature, Address]: feature and its location
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
@@ -195,7 +232,9 @@ class FeatureExtractor:
|
||||
raise NotImplementedError()
|
||||
|
||||
@abc.abstractmethod
|
||||
def extract_insn_features(self, f: FunctionHandle, bb: BBHandle, insn: InsnHandle) -> Iterator[Tuple[Feature, int]]:
|
||||
def extract_insn_features(
|
||||
self, f: FunctionHandle, bb: BBHandle, insn: InsnHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
"""
|
||||
extract instruction-scope features.
|
||||
the arguments are opaque values previously provided by `.get_functions()`, etc.
|
||||
@@ -206,8 +245,8 @@ class FeatureExtractor:
|
||||
for function in extractor.get_functions():
|
||||
for bb in extractor.get_basic_blocks(function):
|
||||
for insn in extractor.get_instructions(function, bb):
|
||||
for feature, va in extractor.extract_insn_features(function, bb, insn):
|
||||
print('0x%x: %s', va, feature)
|
||||
for feature, address in extractor.extract_insn_features(function, bb, insn):
|
||||
print('0x%x: %s', address, feature)
|
||||
|
||||
args:
|
||||
f [FunctionHandle]: an opaque value previously fetched from `.get_functions()`.
|
||||
@@ -215,7 +254,7 @@ class FeatureExtractor:
|
||||
insn [InsnHandle]: an opaque value previously fetched from `.get_instructions()`.
|
||||
|
||||
yields:
|
||||
Tuple[Feature, int]: feature and its location
|
||||
Tuple[Feature, Address]: feature and its location
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user