vmray: add example models for summary_v2.json

This commit is contained in:
Mike Hunhoff
2024-06-13 12:54:59 -06:00
parent a9dafe283c
commit a797405648
2 changed files with 128 additions and 1 deletions

View File

@@ -3,7 +3,8 @@ from pathlib import Path
import pydantic_xml
from capa.features.extractors.vmray.models import Analysis
import capa.helpers
from capa.features.extractors.vmray.models import Analysis, SummaryV2
from capa.features.extractors.base_extractor import SampleHashes, DynamicFeatureExtractor
# TODO also/or look into xmltodict?
@@ -20,9 +21,43 @@ class VMRayExtractor(DynamicFeatureExtractor):
print(vr)
@classmethod
def from_summary(cls, sv2_path: Path):
sv2_json = capa.helpers.load_json_from_path(sv2_path)
sv2 = SummaryV2.model_validate(sv2_json)
for k, v in sv2.files.items():
if not v.is_sample:
continue
if not v.ref_static_data:
continue
static_data = sv2.static_data.get(v.ref_static_data.path[1])
print(f"file_type: {static_data.pe.basic_info.file_type}")
print(f"image_base: {hex(static_data.pe.basic_info.image_base)}")
print(f"machine_type: {static_data.pe.basic_info.machine_type}")
if not static_data.pe:
continue
pe = static_data.pe
if pe.exports:
print("exports")
for export in pe.exports:
print(f"\tname: {export.api.name}, address: {hex(export.address)}")
if pe.imports:
print("imports")
for import_ in pe.imports:
print(f"\tdll: {import_.dll} ({len(import_.apis)})")
if __name__ == "__main__":
import sys
input_path = Path(sys.argv[1])
VMRayExtractor.from_report(input_path)
# VMRayExtractor.from_summary(input_path)

View File

@@ -7,10 +7,15 @@
# See the License for the specific language governing permissions and limitations under the License.
from typing import Any, Dict, List, Union, Literal, Optional
from pydantic import BaseModel
# TODO install/force lxml?
from pydantic_xml import BaseXmlModel, attr, element
### models for flog.xml
class FunctionCall(BaseXmlModel, tag="fncall"):
# ts: str = attr()
# fncall_id: int = attr()
@@ -41,3 +46,90 @@ class Analysis(BaseXmlModel, tag="analysis"):
threads: List[MonitorThread] = element(tag="monitor_thread")
# failing so far...
# fncall: List[FunctionCall] = element(tag="fncall")
### models for summary_v2.json files
class GenericReference(BaseModel):
_type: str
path: List[str]
source: str
class StaticDataReference(GenericReference): ...
class PEFileBasicInfo(BaseModel):
_type: str
compile_time: str
file_type: str
image_base: int
machine_type: str
size_of_code: int
size_of_initialized_data: int
size_of_uninitialized_data: int
subsystem: str
entry_point: int
imphash: Optional[str] = None
class API(BaseModel):
_type: str
name: str
ordinal: Optional[int] = None
class PEFileExport(BaseModel):
_type: str
address: int
api: API
class PEFileImport(BaseModel):
_type: str
address: int
api: API
thunk_offset: int
hint: Optional[int] = None
thunk_rva: int
class PEFileImportModule(BaseModel):
_type: str
dll: str
apis: List[PEFileImport]
class PEFile(BaseModel):
_type: str
basic_info: Optional[PEFileBasicInfo] = None
exports: Optional[List[PEFileExport]] = None
imports: Optional[List[PEFileImportModule]] = None
class StaticData(BaseModel):
pe: Optional[PEFile] = None
class File(BaseModel):
_type: str
categories: List[str]
hash_values: Dict[str, str]
is_artifact: bool
is_ioc: bool
is_sample: bool
size: int
is_truncated: bool
mime_type: Optional[str] = None
operations: Optional[List[str]] = None
ref_filenames: Optional[List[GenericReference]] = None
ref_gfncalls: Optional[List[GenericReference]] = None
ref_static_data: Optional[StaticDataReference] = None
ref_vti_matches: Optional[List[GenericReference]] = None
verdict: str
class SummaryV2(BaseModel):
files: Dict[str, File]
static_data: Dict[str, StaticData]