vmray: add init support for ELF files

This commit is contained in:
Mike Hunhoff
2024-07-18 17:52:33 -06:00
parent 24a31a8bc3
commit 8bf0d16fd8
3 changed files with 65 additions and 7 deletions

View File

@@ -69,10 +69,12 @@ class VMRayAnalysis:
logger.warning("VMRay archive does not contain static data (file_type: %s)", self.file_type)
raise UnsupportedFormatError("VMRay archive does not contain static data (file_type: %s)", self.file_type)
if not self.sample_file_static_data.pe:
logger.warning("VMRay feature extractor only supports PE at this time (file_type: %s)", self.file_type)
if not self.sample_file_static_data.pe and not self.sample_file_static_data.elf:
logger.warning(
"VMRay feature extractor only supports PE and ELF at this time (file_type: %s)", self.file_type
)
raise UnsupportedFormatError(
"VMRay feature extractor only supports PE at this time(file_type: %s)", self.file_type
"VMRay feature extractor only supports PE and ELF at this time(file_type: %s)", self.file_type
)
# VMRay does not store static strings for the sample file so we must use the source file
@@ -126,8 +128,11 @@ class VMRayAnalysis:
def _compute_sections(self):
assert self.sample_file_static_data is not None
if self.sample_file_static_data.pe:
for section in self.sample_file_static_data.pe.sections:
self.sections[section.virtual_address] = section.name
for pefile_section in self.sample_file_static_data.pe.sections:
self.sections[pefile_section.virtual_address] = pefile_section.name
elif self.sample_file_static_data.elf:
for elffile_section in self.sample_file_static_data.elf.sections:
self.sections[elffile_section.header.sh_addr] = elffile_section.header.sh_name
def _compute_process_ids(self):
for process in self.sv2.processes.values():

View File

@@ -9,7 +9,18 @@
import logging
from typing import Tuple, Iterator
from capa.features.common import OS, ARCH_I386, FORMAT_PE, ARCH_AMD64, OS_WINDOWS, Arch, Format, Feature
from capa.features.common import (
OS,
OS_LINUX,
ARCH_I386,
FORMAT_PE,
ARCH_AMD64,
FORMAT_ELF,
OS_WINDOWS,
Arch,
Format,
Feature,
)
from capa.features.address import NO_ADDRESS, Address
from capa.features.extractors.vmray import VMRayAnalysis
@@ -32,6 +43,8 @@ def extract_format(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]
assert analysis.sample_file_static_data is not None
if analysis.sample_file_static_data.pe:
yield Format(FORMAT_PE), NO_ADDRESS
elif analysis.sample_file_static_data.elf:
yield Format(FORMAT_ELF), NO_ADDRESS
else:
logger.warning("unrecognized file format: %s", analysis.sv2.analysis_metadata.sample_type)
raise ValueError(
@@ -44,6 +57,8 @@ def extract_os(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]:
if "windows" in sample_type.lower():
yield OS(OS_WINDOWS), NO_ADDRESS
elif "linux" in sample_type.lower():
yield OS(OS_LINUX), NO_ADDRESS
else:
logger.warning("unrecognized OS: %s", sample_type)
raise ValueError(f"unrecognized OS from the VMRay report: {sample_type}")

View File

@@ -72,6 +72,13 @@ def validate_param_list(value):
return [value]
def validate_call_name(value):
if value.startswith("sys_"):
return value[4:]
else:
return value
# convert the input value to a Python int type before inner validation (int) is called
HexInt = Annotated[int, BeforeValidator(validate_hex_int)]
@@ -98,13 +105,18 @@ class Params(BaseModel):
params: ParamList = Field(alias="param")
# call names may contain uneeded data so we remove that data before
# the inner validation (str) is called
CallName = Annotated[str, BeforeValidator(validate_call_name)]
# models flog.xml files
class FunctionCall(BaseModel):
# ts: HexInt
fncall_id: HexInt
process_id: HexInt
thread_id: HexInt
name: str
name: CallName
# addr: HexInt
# from_addr: HexInt = Field(alias="from")
params_in: Params = Field(alias="in", default=None)
@@ -193,8 +205,34 @@ class PEFile(BaseModel):
sections: List[PEFileSection] = []
class ElfFileSectionHeader(BaseModel):
sh_name: str
sh_addr: int
class ElfFileSection(BaseModel):
header: ElfFileSectionHeader
"""
class ElfFileHeader(BaseModel):
file_class: str
endianness: str
file_type: str
architecture: str
architecture_human_str: str
entry_point: int
"""
class ElfFile(BaseModel):
# file_header: ElfFileHeader
sections: List[ElfFileSection]
class StaticData(BaseModel):
pe: Optional[PEFile] = None
elf: Optional[ElfFile] = None
class FileHashes(BaseModel):