vmray: improve supported file type validation

This commit is contained in:
Mike Hunhoff
2024-07-17 12:37:51 -06:00
parent 100df45cc0
commit 19a6f3ad49
3 changed files with 25 additions and 11 deletions

View File

@@ -7,7 +7,7 @@
# See the License for the specific language governing permissions and limitations under the License.
import json
import logging
from typing import Dict, List, Tuple
from typing import Dict, List, Tuple, Optional
from pathlib import Path
from zipfile import ZipFile
from collections import defaultdict
@@ -54,20 +54,18 @@ class VMRayAnalysis:
self.process_calls: Dict[int, Dict[int, List[FunctionCall]]] = defaultdict(lambda: defaultdict(list))
self.base_address: int
self.sample_file_name: str
self.sample_file_analysis: File
self.sample_file_static_data: StaticData
self.sample_file_name: Optional[str] = None
self.sample_file_analysis: Optional[File] = None
self.sample_file_static_data: Optional[StaticData] = None
self._find_sample_file()
self._compute_base_address()
self._compute_imports()
self._compute_exports()
self._compute_sections()
self._compute_process_ids()
self._compute_process_threads()
self._compute_process_calls()
if self.sample_file_name is None or self.sample_file_analysis is None:
logger.warning("VMRay archive does not contain sample file")
raise UnsupportedFormatError("VMRay archive does not contain sample file")
if not self.sample_file_static_data.pe:
logger.warning("VMRay feature extractor only supports PE at this time")
raise UnsupportedFormatError("VMRay feature extractor only supports PE at this time")
# VMRay does not store static strings for the sample file so we must use the source file
@@ -79,6 +77,15 @@ class VMRayAnalysis:
self.sample_file_buf: bytes = self.zipfile.read(sample_file_path, pwd=DEFAULT_ARCHIVE_PASSWORD)
# only compute these if we've found a supported sample file type
self._compute_base_address()
self._compute_imports()
self._compute_exports()
self._compute_sections()
self._compute_process_ids()
self._compute_process_threads()
self._compute_process_calls()
def _find_sample_file(self):
for file_name, file_analysis in self.sv2.files.items():
if file_analysis.is_sample:
@@ -92,21 +99,25 @@ class VMRayAnalysis:
break
def _compute_base_address(self):
assert self.sample_file_static_data is not None
if self.sample_file_static_data.pe:
self.base_address = self.sample_file_static_data.pe.basic_info.image_base
def _compute_exports(self):
assert self.sample_file_static_data is not None
if self.sample_file_static_data.pe:
for export in self.sample_file_static_data.pe.exports:
self.exports[export.address] = export.api.name
def _compute_imports(self):
assert self.sample_file_static_data is not None
if self.sample_file_static_data.pe:
for module in self.sample_file_static_data.pe.imports:
for api in module.apis:
self.imports[api.address] = (module.dll, api.api.name)
def _compute_sections(self):
assert self.sample_file_static_data is not None
if self.sample_file_static_data.pe:
for section in self.sample_file_static_data.pe.sections:
self.sections[section.virtual_address] = section.name

View File

@@ -29,6 +29,8 @@ from capa.features.extractors.base_extractor import (
class VMRayExtractor(DynamicFeatureExtractor):
def __init__(self, analysis: VMRayAnalysis):
assert analysis.sample_file_analysis is not None
super().__init__(
hashes=SampleHashes(
md5=analysis.sample_file_analysis.hash_values.md5.lower(),

View File

@@ -29,6 +29,7 @@ def extract_arch(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]:
def extract_format(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]:
assert analysis.sample_file_static_data is not None
if analysis.sample_file_static_data.pe:
yield Format(FORMAT_PE), NO_ADDRESS
else: