vmray: connect process, thread, and call

This commit is contained in:
Mike Hunhoff
2024-06-20 13:05:32 -06:00
parent ec21f3b3fc
commit 19502efff3
5 changed files with 66 additions and 39 deletions

View File

@@ -5,10 +5,11 @@
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
from typing import Dict
from typing import Dict, List
from collections import defaultdict
from capa.exceptions import UnsupportedFormatError
from capa.features.extractors.vmray.models import File, Flog, SummaryV2, StaticData
from capa.features.extractors.vmray.models import File, Flog, SummaryV2, StaticData, FunctionCall
class VMRayAnalysis:
@@ -18,6 +19,8 @@ class VMRayAnalysis:
self.exports: Dict[int, str] = {}
self.imports: Dict[int, str] = {}
self.sections: Dict[int, str] = {}
self.process_threads: Dict[int, List[int]] = defaultdict(list)
self.process_calls: Dict[int, Dict[int, List[FunctionCall]]] = defaultdict(lambda: defaultdict(list))
self.base_address: int
self.sample_file_name: str
@@ -28,6 +31,8 @@ class VMRayAnalysis:
self._compute_base_address()
self._compute_exports()
self._compute_sections()
self._compute_process_threads()
self._compute_process_calls()
if not self.sample_file_static_data.pe:
raise UnsupportedFormatError("VMRay feature extractor only supports PE at this time")
@@ -61,3 +66,18 @@ class VMRayAnalysis:
if self.sample_file_static_data.pe:
for section in self.sample_file_static_data.pe.sections:
self.sections[section.virtual_address] = section.name
def _compute_process_threads(self):
for function_call in self.flog.analysis.function_calls:
pid: int = int(function_call.process_id)
tid: int = int(function_call.thread_id)
if tid not in self.process_threads[pid]:
self.process_threads[pid].append(tid)
def _compute_process_calls(self):
for function_call in self.flog.analysis.function_calls:
pid: int = int(function_call.process_id)
tid: int = int(function_call.thread_id)
self.process_calls[pid][tid].append(function_call)

View File

@@ -21,6 +21,10 @@ def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -
yields: Feature, address; where Feature is either: API, Number, or String.
"""
# TODO update for new models
# print(ch)
return
# Extract API name
yield API(ch.inner.name), ch.inner.address

View File

@@ -14,10 +14,11 @@ from zipfile import ZipFile
import xmltodict
import capa.helpers
import capa.features.extractors.vmray.call
import capa.features.extractors.vmray.file
import capa.features.extractors.vmray.global_
from capa.features.common import Feature
from capa.features.address import Address, AbsoluteVirtualAddress
from capa.features.common import Feature, Characteristic
from capa.features.address import NO_ADDRESS, Address, ThreadAddress, DynamicCallAddress, AbsoluteVirtualAddress
from capa.features.extractors.vmray import VMRayAnalysis
from capa.features.extractors.vmray.models import Flog, Process, SummaryV2
from capa.features.extractors.base_extractor import (
@@ -28,8 +29,6 @@ from capa.features.extractors.base_extractor import (
DynamicFeatureExtractor,
)
# TODO also/or look into xmltodict?
class VMRayExtractor(DynamicFeatureExtractor):
def __init__(self, analysis: VMRayAnalysis):
@@ -68,23 +67,27 @@ class VMRayExtractor(DynamicFeatureExtractor):
return process.image_name
def get_threads(self, ph: ProcessHandle) -> Iterator[ThreadHandle]:
# TODO (meh)
yield from []
for thread in self.analysis.process_threads[ph.address.pid]:
address: ThreadAddress = ThreadAddress(process=ph.address, tid=thread)
yield ThreadHandle(address=address, inner={})
def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[Tuple[Feature, Address]]:
# force this routine to be a generator,
# but we don't actually have any elements to generate.
yield from []
if False:
# force this routine to be a generator,
# but we don't actually have any elements to generate.
yield Characteristic("never"), NO_ADDRESS
return
def get_calls(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[CallHandle]:
# TODO (meh)
yield from []
for function_call in self.analysis.process_calls[ph.address.pid][th.address.tid]:
addr = DynamicCallAddress(thread=th.address, id=int(function_call.fncall_id))
yield CallHandle(address=addr, inner=function_call)
def extract_call_features(
self, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle
) -> Iterator[Tuple[Feature, Address]]:
# TODO (meh)
yield from []
yield from capa.features.extractors.vmray.call.extract_features(ph, th, ch)
def get_call_name(self, ph, th, ch) -> str:
# TODO (meh)

View File

@@ -22,10 +22,10 @@ def get_processes(analysis: VMRayAnalysis) -> Iterator[ProcessHandle]:
processes: Dict[str, Process] = analysis.sv2.processes
for _, process in processes.items():
pid = process.os_pid
ppid = processes[process.ref_parent_process.path[1]].os_pid if process.ref_parent_process else 0
pid = process.monitor_id
ppid = processes[process.ref_parent_process.path[1]].monitor_id if process.ref_parent_process else 0
addr = ProcessAddress(pid=pid, ppid=ppid)
addr = ProcessAddress(pid=int(pid), ppid=int(ppid))
yield ProcessHandle(address=addr, inner=process)

View File

@@ -235,34 +235,34 @@ def print_dynamic_features(processes, extractor: DynamicFeatureExtractor):
print(f" proc: {extractor.get_process_name(p)}: {feature}")
for t in extractor.get_threads(p):
print(f" thread: {t.address.tid}")
for feature, addr in extractor.extract_thread_features(p, t):
for t in extractor.get_threads(p):
print(f" thread: {t.address.tid}")
for feature, addr in extractor.extract_thread_features(p, t):
if is_global_feature(feature):
continue
if feature != Feature(0):
print(f" {format_address(addr)}: {feature}")
for call in extractor.get_calls(p, t):
apis = []
arguments = []
for feature, addr in extractor.extract_call_features(p, t, call):
if is_global_feature(feature):
continue
if feature != Feature(0):
print(f" {format_address(addr)}: {feature}")
if isinstance(feature, API):
assert isinstance(addr, capa.features.address.DynamicCallAddress)
apis.append((addr.id, str(feature.value)))
for call in extractor.get_calls(p, t):
apis = []
arguments = []
for feature, addr in extractor.extract_call_features(p, t, call):
if is_global_feature(feature):
continue
if isinstance(feature, (Number, String)):
arguments.append(str(feature.value))
if isinstance(feature, API):
assert isinstance(addr, capa.features.address.DynamicCallAddress)
apis.append((addr.id, str(feature.value)))
# if not apis:
# print(f" arguments=[{', '.join(arguments)}]")
if isinstance(feature, (Number, String)):
arguments.append(str(feature.value))
if not apis:
print(f" arguments=[{', '.join(arguments)}]")
for cid, api in apis:
print(f" call {cid}: {api}({', '.join(arguments)})")
for cid, api in apis:
print(f" call {cid}: {api}({', '.join(arguments)})")
def ida_main():