From 8bf0d16fd8008c6a3c28f06279065bc120f11b9e Mon Sep 17 00:00:00 2001 From: Mike Hunhoff Date: Thu, 18 Jul 2024 17:52:33 -0600 Subject: [PATCH] vmray: add init support for ELF files --- capa/features/extractors/vmray/__init__.py | 15 +++++--- capa/features/extractors/vmray/global_.py | 17 ++++++++- capa/features/extractors/vmray/models.py | 40 +++++++++++++++++++++- 3 files changed, 65 insertions(+), 7 deletions(-) diff --git a/capa/features/extractors/vmray/__init__.py b/capa/features/extractors/vmray/__init__.py index 27906d83..141a2595 100644 --- a/capa/features/extractors/vmray/__init__.py +++ b/capa/features/extractors/vmray/__init__.py @@ -69,10 +69,12 @@ class VMRayAnalysis: logger.warning("VMRay archive does not contain static data (file_type: %s)", self.file_type) raise UnsupportedFormatError("VMRay archive does not contain static data (file_type: %s)", self.file_type) - if not self.sample_file_static_data.pe: - logger.warning("VMRay feature extractor only supports PE at this time (file_type: %s)", self.file_type) + if not self.sample_file_static_data.pe and not self.sample_file_static_data.elf: + logger.warning( + "VMRay feature extractor only supports PE and ELF at this time (file_type: %s)", self.file_type + ) raise UnsupportedFormatError( - "VMRay feature extractor only supports PE at this time(file_type: %s)", self.file_type + "VMRay feature extractor only supports PE and ELF at this time(file_type: %s)", self.file_type ) # VMRay does not store static strings for the sample file so we must use the source file @@ -126,8 +128,11 @@ class VMRayAnalysis: def _compute_sections(self): assert self.sample_file_static_data is not None if self.sample_file_static_data.pe: - for section in self.sample_file_static_data.pe.sections: - self.sections[section.virtual_address] = section.name + for pefile_section in self.sample_file_static_data.pe.sections: + self.sections[pefile_section.virtual_address] = pefile_section.name + elif self.sample_file_static_data.elf: + for elffile_section in self.sample_file_static_data.elf.sections: + self.sections[elffile_section.header.sh_addr] = elffile_section.header.sh_name def _compute_process_ids(self): for process in self.sv2.processes.values(): diff --git a/capa/features/extractors/vmray/global_.py b/capa/features/extractors/vmray/global_.py index 69f91bf0..82ab2458 100644 --- a/capa/features/extractors/vmray/global_.py +++ b/capa/features/extractors/vmray/global_.py @@ -9,7 +9,18 @@ import logging from typing import Tuple, Iterator -from capa.features.common import OS, ARCH_I386, FORMAT_PE, ARCH_AMD64, OS_WINDOWS, Arch, Format, Feature +from capa.features.common import ( + OS, + OS_LINUX, + ARCH_I386, + FORMAT_PE, + ARCH_AMD64, + FORMAT_ELF, + OS_WINDOWS, + Arch, + Format, + Feature, +) from capa.features.address import NO_ADDRESS, Address from capa.features.extractors.vmray import VMRayAnalysis @@ -32,6 +43,8 @@ def extract_format(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]] assert analysis.sample_file_static_data is not None if analysis.sample_file_static_data.pe: yield Format(FORMAT_PE), NO_ADDRESS + elif analysis.sample_file_static_data.elf: + yield Format(FORMAT_ELF), NO_ADDRESS else: logger.warning("unrecognized file format: %s", analysis.sv2.analysis_metadata.sample_type) raise ValueError( @@ -44,6 +57,8 @@ def extract_os(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]: if "windows" in sample_type.lower(): yield OS(OS_WINDOWS), NO_ADDRESS + elif "linux" in sample_type.lower(): + yield OS(OS_LINUX), NO_ADDRESS else: logger.warning("unrecognized OS: %s", sample_type) raise ValueError(f"unrecognized OS from the VMRay report: {sample_type}") diff --git a/capa/features/extractors/vmray/models.py b/capa/features/extractors/vmray/models.py index 4291e7d0..9d2bd271 100644 --- a/capa/features/extractors/vmray/models.py +++ b/capa/features/extractors/vmray/models.py @@ -72,6 +72,13 @@ def validate_param_list(value): return [value] +def validate_call_name(value): + if value.startswith("sys_"): + return value[4:] + else: + return value + + # convert the input value to a Python int type before inner validation (int) is called HexInt = Annotated[int, BeforeValidator(validate_hex_int)] @@ -98,13 +105,18 @@ class Params(BaseModel): params: ParamList = Field(alias="param") +# call names may contain uneeded data so we remove that data before +# the inner validation (str) is called +CallName = Annotated[str, BeforeValidator(validate_call_name)] + + # models flog.xml files class FunctionCall(BaseModel): # ts: HexInt fncall_id: HexInt process_id: HexInt thread_id: HexInt - name: str + name: CallName # addr: HexInt # from_addr: HexInt = Field(alias="from") params_in: Params = Field(alias="in", default=None) @@ -193,8 +205,34 @@ class PEFile(BaseModel): sections: List[PEFileSection] = [] +class ElfFileSectionHeader(BaseModel): + sh_name: str + sh_addr: int + + +class ElfFileSection(BaseModel): + header: ElfFileSectionHeader + + +""" +class ElfFileHeader(BaseModel): + file_class: str + endianness: str + file_type: str + architecture: str + architecture_human_str: str + entry_point: int +""" + + +class ElfFile(BaseModel): + # file_header: ElfFileHeader + sections: List[ElfFileSection] + + class StaticData(BaseModel): pe: Optional[PEFile] = None + elf: Optional[ElfFile] = None class FileHashes(BaseModel):