Add VMRayanalysis model and call parser

This commit is contained in:
r-sm2024
2024-06-18 21:29:57 +00:00
parent be274d1d65
commit 2b70086467
2 changed files with 124 additions and 0 deletions

View File

@@ -0,0 +1,57 @@
import logging
from typing import Tuple, Iterator
from capa.helpers import assert_never
from capa.features.insn import API, Number
from capa.features.common import String, Feature
from capa.features.address import Address
from capa.features.extractors.vmray.models import FunctionCall, Analysis
from capa.features.extractors.base_extractor import CallHandle, ThreadHandle, ProcessHandle
logger = logging.getLogger(__name__)
def extract_function_calls(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[Tuple[Feature, Address]]:
"""
this method extracts the given call's features (such as API name and arguments),
and returns them as API, Number, and String features.
args:
call: FunctionCall object representing the XML fncall element
yields: Feature, address; where Feature is either: API, Number, or String.
"""
# Extract API name
yield API(ch.inner.name), ch.inner.address
# Extract arguments from <in>
for param in ch.inner.in_:
value = param.value
if isinstance(value, str):
yield String(value), ch.inner.address
elif isinstance(value, int):
yield Number(value), ch.inner.address
else:
assert_never(value)
# Extract return value from <out>
if ch.inner.out is not None:
value = ch.inner.out.value
if isinstance(value, str):
yield String(value), ch.inner.address
elif isinstance(value, int):
yield Number(value), ch.inner.address
else:
assert_never(value)
def extract_features(analysis: Analysis) -> Iterator[Tuple[Feature, Address]]:
'''
Extract features from the Analysis object in models.py
'''
for fncall in analysis.fncalls:
yield from extract_function_calls(fncall)

View File

@@ -0,0 +1,67 @@
# Copyright (C) 2024 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
from typing import Any, Dict, List, Union, Literal, Optional
from pydantic_xml import BaseXmlModel, attr, element
#
class Param(BaseXmlModel):
name: str = attr()
type: str = attr()
value: str = attr()
class FunctionCall(BaseXmlModel, tag="fncall"):
ts: int = attr()
fncall_id: int = attr()
process_id: int = attr()
name: str = attr() #API call name?
address: str = attr() #address
from_: str = attr()
in_: List[Param] = element(name="in")
out: Optional[Param] = element(name="out")
class FunctionReturn(BaseXmlModel, tag="fnret"):
ts: int = attr()
fncall_id: int = attr()
addr: str = attr() #string that contains a hex value
from_: str = attr #string that contains a hex value
class MonitorProcess(BaseXmlModel, tag="monitor_process"):
ts: int = attr()
process_id: int = attr()
image_name: str = attr()
class MonitorThread(BaseXmlModel, tag="monitor_thread"):
ts: int = attr()
thread_id: int = attr()
process_id: int = attr()
os_tid: str = attr() # TODO hex
class NewRegion(BaseXmlModel):
ts: int = attr()
start_va: str = attr()
end_va: str = attr()
entry_point: str = attr()
class RemoveRegion(BaseXmlModel, tag="remove_region"):
ts: int = attr()
region_id: int = attr()
class Analysis(BaseXmlModel, tag="analysis"):
log_version: str = attr()
analyzer_version: str = attr()
analysis_date: str = attr()
processes: List[MonitorProcess] = element(tag="monitor_process")
threads: List[MonitorThread] = element(tag="monitor_thread")
new_regions: List[NewRegion] = element(tag="new_region")
remove_regions: List[RemoveRegion] = element(tag="remove_region")
fncalls: List[FunctionCall] = element(tag="fncall")
fnrets: List[FunctionReturn] = element(tag="fnret")