mirror of
https://github.com/mandiant/capa.git
synced 2025-12-12 15:49:46 -08:00
Merge pull request #1762 from yelhamer/modify-sample-hashes
Modify sample hashes
This commit is contained in:
@@ -106,13 +106,14 @@ class StaticFeatureExtractor:
|
||||
|
||||
__metaclass__ = abc.ABCMeta
|
||||
|
||||
def __init__(self):
|
||||
def __init__(self, hashes: SampleHashes):
|
||||
#
|
||||
# note: a subclass should define ctor parameters for its own use.
|
||||
# for example, the Vivisect feature extract might require the vw and/or path.
|
||||
# this base class doesn't know what to do with that info, though.
|
||||
#
|
||||
super().__init__()
|
||||
self._sample_hashes = hashes
|
||||
|
||||
@abc.abstractmethod
|
||||
def get_base_address(self) -> Union[AbsoluteVirtualAddress, capa.features.address._NoAddress]:
|
||||
@@ -130,7 +131,7 @@ class StaticFeatureExtractor:
|
||||
"""
|
||||
fetch the hashes for the sample contained within the extractor.
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
return self._sample_hashes
|
||||
|
||||
@abc.abstractmethod
|
||||
def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]:
|
||||
@@ -353,20 +354,21 @@ class DynamicFeatureExtractor:
|
||||
|
||||
__metaclass__ = abc.ABCMeta
|
||||
|
||||
def __init__(self):
|
||||
def __init__(self, hashes: SampleHashes):
|
||||
#
|
||||
# note: a subclass should define ctor parameters for its own use.
|
||||
# for example, the Vivisect feature extract might require the vw and/or path.
|
||||
# this base class doesn't know what to do with that info, though.
|
||||
#
|
||||
super().__init__()
|
||||
self._sample_hashes = hashes
|
||||
|
||||
@abc.abstractmethod
|
||||
def get_sample_hashes(self) -> SampleHashes:
|
||||
"""
|
||||
fetch the hashes for the sample contained within the extractor.
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
return self._sample_hashes
|
||||
|
||||
@abc.abstractmethod
|
||||
def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]:
|
||||
|
||||
@@ -29,20 +29,16 @@ from capa.features.extractors.base_extractor import (
|
||||
|
||||
class BinjaFeatureExtractor(StaticFeatureExtractor):
|
||||
def __init__(self, bv: binja.BinaryView):
|
||||
super().__init__()
|
||||
super().__init__(hashes=SampleHashes.from_bytes(Path(bv.file.original_filename).read_bytes()))
|
||||
self.bv = bv
|
||||
self.global_features: List[Tuple[Feature, Address]] = []
|
||||
self.global_features.extend(capa.features.extractors.binja.file.extract_file_format(self.bv))
|
||||
self.global_features.extend(capa.features.extractors.binja.global_.extract_os(self.bv))
|
||||
self.global_features.extend(capa.features.extractors.binja.global_.extract_arch(self.bv))
|
||||
self.sample_hashes = SampleHashes.from_bytes(Path(bv.file.original_filename).read_bytes())
|
||||
|
||||
def get_base_address(self):
|
||||
return AbsoluteVirtualAddress(self.bv.start)
|
||||
|
||||
def get_sample_hashes(self) -> SampleHashes:
|
||||
return self.sample_hashes
|
||||
|
||||
def extract_global_features(self):
|
||||
yield from self.global_features
|
||||
|
||||
|
||||
@@ -33,15 +33,14 @@ TESTED_VERSIONS = {"2.2-CAPE", "2.4-CAPE"}
|
||||
|
||||
class CapeExtractor(DynamicFeatureExtractor):
|
||||
def __init__(self, report: CapeReport):
|
||||
super().__init__()
|
||||
self.report: CapeReport = report
|
||||
|
||||
self.sample_hashes = SampleHashes(
|
||||
md5=self.report.target.file.md5.lower(),
|
||||
sha1=self.report.target.file.sha1.lower(),
|
||||
sha256=self.report.target.file.sha256.lower(),
|
||||
super().__init__(
|
||||
hashes=SampleHashes(
|
||||
md5=report.target.file.md5.lower(),
|
||||
sha1=report.target.file.sha1.lower(),
|
||||
sha256=report.target.file.sha256.lower(),
|
||||
)
|
||||
)
|
||||
|
||||
self.report: CapeReport = report
|
||||
self.global_features = capa.features.extractors.cape.global_.extract_features(self.report)
|
||||
|
||||
def get_base_address(self) -> Union[AbsoluteVirtualAddress, _NoAddress, None]:
|
||||
@@ -49,9 +48,6 @@ class CapeExtractor(DynamicFeatureExtractor):
|
||||
assert self.report.static is not None and self.report.static.pe is not None
|
||||
return AbsoluteVirtualAddress(self.report.static.pe.imagebase)
|
||||
|
||||
def get_sample_hashes(self) -> SampleHashes:
|
||||
return self.sample_hashes
|
||||
|
||||
def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]:
|
||||
yield from self.global_features
|
||||
|
||||
|
||||
@@ -76,9 +76,8 @@ class DnFileFeatureExtractorCache:
|
||||
|
||||
class DnfileFeatureExtractor(StaticFeatureExtractor):
|
||||
def __init__(self, path: Path):
|
||||
super().__init__()
|
||||
self.pe: dnfile.dnPE = dnfile.dnPE(str(path))
|
||||
self.sample_hashes = SampleHashes.from_bytes(path.read_bytes())
|
||||
super().__init__(hashes=SampleHashes.from_bytes(path.read_bytes()))
|
||||
|
||||
# pre-compute .NET token lookup tables; each .NET method has access to this cache for feature extraction
|
||||
# most relevant at instruction scope
|
||||
@@ -93,9 +92,6 @@ class DnfileFeatureExtractor(StaticFeatureExtractor):
|
||||
def get_base_address(self):
|
||||
return NO_ADDRESS
|
||||
|
||||
def get_sample_hashes(self) -> SampleHashes:
|
||||
return self.sample_hashes
|
||||
|
||||
def extract_global_features(self):
|
||||
yield from self.global_features
|
||||
|
||||
|
||||
@@ -83,17 +83,13 @@ GLOBAL_HANDLERS = (
|
||||
|
||||
class DnfileFeatureExtractor(StaticFeatureExtractor):
|
||||
def __init__(self, path: Path):
|
||||
super().__init__()
|
||||
super().__init__(hashes=SampleHashes.from_bytes(path.read_bytes()))
|
||||
self.path: Path = path
|
||||
self.pe: dnfile.dnPE = dnfile.dnPE(str(path))
|
||||
self.sample_hashes = SampleHashes.from_bytes(self.path.read_bytes())
|
||||
|
||||
def get_base_address(self) -> AbsoluteVirtualAddress:
|
||||
return AbsoluteVirtualAddress(0x0)
|
||||
|
||||
def get_sample_hashes(self) -> SampleHashes:
|
||||
return self.sample_hashes
|
||||
|
||||
def get_entry_point(self) -> int:
|
||||
# self.pe.net.Flags.CLT_NATIVE_ENTRYPOINT
|
||||
# True: native EP: Token
|
||||
|
||||
@@ -167,17 +167,13 @@ GLOBAL_HANDLERS = (
|
||||
|
||||
class DotnetFileFeatureExtractor(StaticFeatureExtractor):
|
||||
def __init__(self, path: Path):
|
||||
super().__init__()
|
||||
super().__init__(hashes=SampleHashes.from_bytes(path.read_bytes()))
|
||||
self.path: Path = path
|
||||
self.pe: dnfile.dnPE = dnfile.dnPE(str(path))
|
||||
self.sample_hashes = SampleHashes.from_bytes(self.path.read_bytes())
|
||||
|
||||
def get_base_address(self):
|
||||
return NO_ADDRESS
|
||||
|
||||
def get_sample_hashes(self) -> SampleHashes:
|
||||
return self.sample_hashes
|
||||
|
||||
def get_entry_point(self) -> int:
|
||||
# self.pe.net.Flags.CLT_NATIVE_ENTRYPOINT
|
||||
# True: native EP: Token
|
||||
|
||||
@@ -30,21 +30,19 @@ from capa.features.extractors.base_extractor import (
|
||||
|
||||
class IdaFeatureExtractor(StaticFeatureExtractor):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
super().__init__(
|
||||
hashes=SampleHashes(
|
||||
md5=ida_nalt.retrieve_input_file_md5(), sha1="(unknown)", sha256=ida_nalt.retrieve_input_file_sha256()
|
||||
)
|
||||
)
|
||||
self.global_features: List[Tuple[Feature, Address]] = []
|
||||
self.global_features.extend(capa.features.extractors.ida.file.extract_file_format())
|
||||
self.global_features.extend(capa.features.extractors.ida.global_.extract_os())
|
||||
self.global_features.extend(capa.features.extractors.ida.global_.extract_arch())
|
||||
self.sample_hashes = SampleHashes(
|
||||
md5=ida_nalt.retrieve_input_file_md5(), sha1="(unknown)", sha256=ida_nalt.retrieve_input_file_sha256()
|
||||
)
|
||||
|
||||
def get_base_address(self):
|
||||
return AbsoluteVirtualAddress(idaapi.get_imagebase())
|
||||
|
||||
def get_sample_hashes(self) -> SampleHashes:
|
||||
return self.sample_hashes
|
||||
|
||||
def extract_global_features(self):
|
||||
yield from self.global_features
|
||||
|
||||
|
||||
@@ -63,9 +63,6 @@ class NullStaticFeatureExtractor(StaticFeatureExtractor):
|
||||
for feature in self.global_features:
|
||||
yield feature, NO_ADDRESS
|
||||
|
||||
def get_sample_hashes(self) -> SampleHashes:
|
||||
return self.sample_hashes
|
||||
|
||||
def extract_file_features(self):
|
||||
for address, feature in self.file_features:
|
||||
yield feature, address
|
||||
@@ -124,9 +121,6 @@ class NullDynamicFeatureExtractor(DynamicFeatureExtractor):
|
||||
for feature in self.global_features:
|
||||
yield feature, NO_ADDRESS
|
||||
|
||||
def get_sample_hashes(self) -> SampleHashes:
|
||||
return self.sample_hashes
|
||||
|
||||
def extract_file_features(self):
|
||||
for address, feature in self.file_features:
|
||||
yield feature, address
|
||||
|
||||
@@ -187,17 +187,13 @@ GLOBAL_HANDLERS = (
|
||||
|
||||
class PefileFeatureExtractor(StaticFeatureExtractor):
|
||||
def __init__(self, path: Path):
|
||||
super().__init__()
|
||||
super().__init__(hashes=SampleHashes.from_bytes(path.read_bytes()))
|
||||
self.path: Path = path
|
||||
self.pe = pefile.PE(str(path))
|
||||
self.sample_hashes = SampleHashes.from_bytes(self.path.read_bytes())
|
||||
|
||||
def get_base_address(self):
|
||||
return AbsoluteVirtualAddress(self.pe.OPTIONAL_HEADER.ImageBase)
|
||||
|
||||
def get_sample_hashes(self) -> SampleHashes:
|
||||
return self.sample_hashes
|
||||
|
||||
def extract_global_features(self):
|
||||
buf = Path(self.path).read_bytes()
|
||||
|
||||
|
||||
@@ -33,11 +33,10 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
class VivisectFeatureExtractor(StaticFeatureExtractor):
|
||||
def __init__(self, vw, path: Path, os):
|
||||
super().__init__()
|
||||
self.vw = vw
|
||||
self.path = path
|
||||
self.buf = path.read_bytes()
|
||||
self.sample_hashes = SampleHashes.from_bytes(self.buf)
|
||||
super().__init__(hashes=SampleHashes.from_bytes(self.buf))
|
||||
|
||||
# pre-compute these because we'll yield them at *every* scope.
|
||||
self.global_features: List[Tuple[Feature, Address]] = []
|
||||
@@ -49,9 +48,6 @@ class VivisectFeatureExtractor(StaticFeatureExtractor):
|
||||
# assume there is only one file loaded into the vw
|
||||
return AbsoluteVirtualAddress(list(self.vw.filemeta.values())[0]["imagebase"])
|
||||
|
||||
def get_sample_hashes(self) -> SampleHashes:
|
||||
return self.sample_hashes
|
||||
|
||||
def extract_global_features(self):
|
||||
yield from self.global_features
|
||||
|
||||
|
||||
Reference in New Issue
Block a user