add get_sample_hashes() to base extractor

This commit is contained in:
Yacine Elhamer
2023-07-21 08:44:37 +01:00
parent 806bc1853d
commit 24b3abd706
9 changed files with 18 additions and 18 deletions

View File

@@ -40,9 +40,6 @@ class BinjaFeatureExtractor(StaticFeatureExtractor):
def get_base_address(self):
return AbsoluteVirtualAddress(self.bv.start)
def get_sample_hashes(self):
return tuple(self.sample_hashes)
def extract_global_features(self):
yield from self.global_features

View File

@@ -40,9 +40,6 @@ class CapeExtractor(DynamicFeatureExtractor):
# value according to the PE header, the actual trace may use a different imagebase
return AbsoluteVirtualAddress(self.static["pe"]["imagebase"])
def get_sample_hashes(self):
return tuple(self.hashes)
def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]:
yield from self.global_features

View File

@@ -93,9 +93,6 @@ class DnfileFeatureExtractor(StaticFeatureExtractor):
def get_base_address(self):
return NO_ADDRESS
def get_sample_hashes(self):
return tuple(self.sample_hashes)
def extract_global_features(self):
yield from self.global_features

View File

@@ -31,7 +31,7 @@ from capa.features.common import (
Characteristic,
)
from capa.features.address import NO_ADDRESS, Address, DNTokenAddress
from capa.features.extractors.base_extractor import StaticFeatureExtractor
from capa.features.extractors.base_extractor import SampleHashes, StaticFeatureExtractor
from capa.features.extractors.dnfile.helpers import (
DnType,
iter_dotnet_table,
@@ -170,6 +170,7 @@ class DotnetFileFeatureExtractor(StaticFeatureExtractor):
super().__init__()
self.path: Path = path
self.pe: dnfile.dnPE = dnfile.dnPE(str(path))
self.hashes = SampleHashes.from_bytes(self.path.read_bytes())
def get_base_address(self):
return NO_ADDRESS

View File

@@ -40,9 +40,6 @@ class IdaFeatureExtractor(StaticFeatureExtractor):
def get_base_address(self):
return AbsoluteVirtualAddress(idaapi.get_imagebase())
def get_sample_hashes(self):
return self.sample_hashes
def extract_global_features(self):
yield from self.global_features

View File

@@ -15,6 +15,7 @@ from capa.features.address import NO_ADDRESS, Address, ThreadAddress, ProcessAdd
from capa.features.extractors.base_extractor import (
BBHandle,
InsnHandle,
SampleHashes,
ThreadHandle,
ProcessHandle,
FunctionHandle,
@@ -49,6 +50,7 @@ class NullStaticFeatureExtractor(StaticFeatureExtractor):
"""
base_address: Address
sample_hashes: SampleHashes
global_features: List[Feature]
file_features: List[Tuple[Address, Feature]]
functions: Dict[Address, FunctionFeatures]
@@ -103,6 +105,7 @@ class ProcessFeatures:
@dataclass
class NullDynamicFeatureExtractor(DynamicFeatureExtractor):
base_address: Address
sample_hashes: SampleHashes
global_features: List[Feature]
file_features: List[Tuple[Address, Feature]]
processes: Dict[Address, ProcessFeatures]

View File

@@ -19,7 +19,7 @@ import capa.features.extractors.strings
from capa.features.file import Export, Import, Section
from capa.features.common import OS, ARCH_I386, FORMAT_PE, ARCH_AMD64, OS_WINDOWS, Arch, Format, Characteristic
from capa.features.address import NO_ADDRESS, FileOffsetAddress, AbsoluteVirtualAddress
from capa.features.extractors.base_extractor import StaticFeatureExtractor
from capa.features.extractors.base_extractor import SampleHashes, StaticFeatureExtractor
logger = logging.getLogger(__name__)
@@ -190,6 +190,7 @@ class PefileFeatureExtractor(StaticFeatureExtractor):
super().__init__()
self.path: Path = path
self.pe = pefile.PE(str(path))
self.hashes = SampleHashes.from_bytes(self.path.read_bytes())
def get_base_address(self):
return AbsoluteVirtualAddress(self.pe.OPTIONAL_HEADER.ImageBase)

View File

@@ -49,9 +49,6 @@ class VivisectFeatureExtractor(StaticFeatureExtractor):
# assume there is only one file loaded into the vw
return AbsoluteVirtualAddress(list(self.vw.filemeta.values())[0]["imagebase"])
def get_sample_hashes(self):
return tuple(self.sample_hashes)
def extract_global_features(self):
yield from self.global_features

View File

@@ -27,7 +27,12 @@ import capa.features.basicblock
import capa.features.extractors.null as null
from capa.helpers import assert_never
from capa.features.freeze.features import Feature, feature_from_capa
from capa.features.extractors.base_extractor import FeatureExtractor, StaticFeatureExtractor, DynamicFeatureExtractor
from capa.features.extractors.base_extractor import (
SampleHashes,
FeatureExtractor,
StaticFeatureExtractor,
DynamicFeatureExtractor,
)
logger = logging.getLogger(__name__)
@@ -300,6 +305,7 @@ class Extractor(BaseModel):
class Freeze(BaseModel):
version: int = 2
base_address: Address = Field(alias="base address")
sample_hashes: SampleHashes
extractor: Extractor
features: Features
@@ -400,6 +406,7 @@ def dumps_static(extractor: StaticFeatureExtractor) -> str:
freeze = Freeze(
version=2,
base_address=Address.from_capa(extractor.get_base_address()),
sample_hashes=extractor.get_sample_hashes(),
extractor=Extractor(name=extractor.__class__.__name__),
features=features,
) # type: ignore
@@ -484,6 +491,7 @@ def dumps_dynamic(extractor: DynamicFeatureExtractor) -> str:
freeze = Freeze(
version=2,
base_address=Address.from_capa(base_addr),
sample_hashes=extractor.get_sample_hashes(),
extractor=Extractor(name=extractor.__class__.__name__),
features=features,
) # type: ignore
@@ -501,6 +509,7 @@ def loads_static(s: str) -> StaticFeatureExtractor:
assert isinstance(freeze.features, StaticFeatures)
return null.NullStaticFeatureExtractor(
base_address=freeze.base_address.to_capa(),
sample_hashes=freeze.sample_hashes,
global_features=[f.feature.to_capa() for f in freeze.features.global_],
file_features=[(f.address.to_capa(), f.feature.to_capa()) for f in freeze.features.file],
functions={
@@ -533,6 +542,7 @@ def loads_dynamic(s: str) -> DynamicFeatureExtractor:
assert isinstance(freeze.features, DynamicFeatures)
return null.NullDynamicFeatureExtractor(
base_address=freeze.base_address.to_capa(),
sample_hashes=freeze.sample_hashes,
global_features=[f.feature.to_capa() for f in freeze.features.global_],
file_features=[(f.address.to_capa(), f.feature.to_capa()) for f in freeze.features.file],
processes={