vmray: connect process, thread, and call

This commit is contained in:
Mike Hunhoff
2024-06-20 13:05:32 -06:00
parent ec21f3b3fc
commit 19502efff3
5 changed files with 66 additions and 39 deletions

View File

@@ -5,10 +5,11 @@
# Unless required by applicable law or agreed to in writing, software distributed under the License # Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License. # See the License for the specific language governing permissions and limitations under the License.
from typing import Dict from typing import Dict, List
from collections import defaultdict
from capa.exceptions import UnsupportedFormatError from capa.exceptions import UnsupportedFormatError
from capa.features.extractors.vmray.models import File, Flog, SummaryV2, StaticData from capa.features.extractors.vmray.models import File, Flog, SummaryV2, StaticData, FunctionCall
class VMRayAnalysis: class VMRayAnalysis:
@@ -18,6 +19,8 @@ class VMRayAnalysis:
self.exports: Dict[int, str] = {} self.exports: Dict[int, str] = {}
self.imports: Dict[int, str] = {} self.imports: Dict[int, str] = {}
self.sections: Dict[int, str] = {} self.sections: Dict[int, str] = {}
self.process_threads: Dict[int, List[int]] = defaultdict(list)
self.process_calls: Dict[int, Dict[int, List[FunctionCall]]] = defaultdict(lambda: defaultdict(list))
self.base_address: int self.base_address: int
self.sample_file_name: str self.sample_file_name: str
@@ -28,6 +31,8 @@ class VMRayAnalysis:
self._compute_base_address() self._compute_base_address()
self._compute_exports() self._compute_exports()
self._compute_sections() self._compute_sections()
self._compute_process_threads()
self._compute_process_calls()
if not self.sample_file_static_data.pe: if not self.sample_file_static_data.pe:
raise UnsupportedFormatError("VMRay feature extractor only supports PE at this time") raise UnsupportedFormatError("VMRay feature extractor only supports PE at this time")
@@ -61,3 +66,18 @@ class VMRayAnalysis:
if self.sample_file_static_data.pe: if self.sample_file_static_data.pe:
for section in self.sample_file_static_data.pe.sections: for section in self.sample_file_static_data.pe.sections:
self.sections[section.virtual_address] = section.name self.sections[section.virtual_address] = section.name
def _compute_process_threads(self):
for function_call in self.flog.analysis.function_calls:
pid: int = int(function_call.process_id)
tid: int = int(function_call.thread_id)
if tid not in self.process_threads[pid]:
self.process_threads[pid].append(tid)
def _compute_process_calls(self):
for function_call in self.flog.analysis.function_calls:
pid: int = int(function_call.process_id)
tid: int = int(function_call.thread_id)
self.process_calls[pid][tid].append(function_call)

View File

@@ -21,6 +21,10 @@ def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -
yields: Feature, address; where Feature is either: API, Number, or String. yields: Feature, address; where Feature is either: API, Number, or String.
""" """
# TODO update for new models
# print(ch)
return
# Extract API name # Extract API name
yield API(ch.inner.name), ch.inner.address yield API(ch.inner.name), ch.inner.address

View File

@@ -14,10 +14,11 @@ from zipfile import ZipFile
import xmltodict import xmltodict
import capa.helpers import capa.helpers
import capa.features.extractors.vmray.call
import capa.features.extractors.vmray.file import capa.features.extractors.vmray.file
import capa.features.extractors.vmray.global_ import capa.features.extractors.vmray.global_
from capa.features.common import Feature from capa.features.common import Feature, Characteristic
from capa.features.address import Address, AbsoluteVirtualAddress from capa.features.address import NO_ADDRESS, Address, ThreadAddress, DynamicCallAddress, AbsoluteVirtualAddress
from capa.features.extractors.vmray import VMRayAnalysis from capa.features.extractors.vmray import VMRayAnalysis
from capa.features.extractors.vmray.models import Flog, Process, SummaryV2 from capa.features.extractors.vmray.models import Flog, Process, SummaryV2
from capa.features.extractors.base_extractor import ( from capa.features.extractors.base_extractor import (
@@ -28,8 +29,6 @@ from capa.features.extractors.base_extractor import (
DynamicFeatureExtractor, DynamicFeatureExtractor,
) )
# TODO also/or look into xmltodict?
class VMRayExtractor(DynamicFeatureExtractor): class VMRayExtractor(DynamicFeatureExtractor):
def __init__(self, analysis: VMRayAnalysis): def __init__(self, analysis: VMRayAnalysis):
@@ -68,23 +67,27 @@ class VMRayExtractor(DynamicFeatureExtractor):
return process.image_name return process.image_name
def get_threads(self, ph: ProcessHandle) -> Iterator[ThreadHandle]: def get_threads(self, ph: ProcessHandle) -> Iterator[ThreadHandle]:
# TODO (meh) for thread in self.analysis.process_threads[ph.address.pid]:
yield from [] address: ThreadAddress = ThreadAddress(process=ph.address, tid=thread)
yield ThreadHandle(address=address, inner={})
def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[Tuple[Feature, Address]]: def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[Tuple[Feature, Address]]:
# force this routine to be a generator, if False:
# but we don't actually have any elements to generate. # force this routine to be a generator,
yield from [] # but we don't actually have any elements to generate.
yield Characteristic("never"), NO_ADDRESS
return
def get_calls(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[CallHandle]: def get_calls(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[CallHandle]:
# TODO (meh) for function_call in self.analysis.process_calls[ph.address.pid][th.address.tid]:
yield from [] addr = DynamicCallAddress(thread=th.address, id=int(function_call.fncall_id))
yield CallHandle(address=addr, inner=function_call)
def extract_call_features( def extract_call_features(
self, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle self, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle
) -> Iterator[Tuple[Feature, Address]]: ) -> Iterator[Tuple[Feature, Address]]:
# TODO (meh) # TODO (meh)
yield from [] yield from capa.features.extractors.vmray.call.extract_features(ph, th, ch)
def get_call_name(self, ph, th, ch) -> str: def get_call_name(self, ph, th, ch) -> str:
# TODO (meh) # TODO (meh)

View File

@@ -22,10 +22,10 @@ def get_processes(analysis: VMRayAnalysis) -> Iterator[ProcessHandle]:
processes: Dict[str, Process] = analysis.sv2.processes processes: Dict[str, Process] = analysis.sv2.processes
for _, process in processes.items(): for _, process in processes.items():
pid = process.os_pid pid = process.monitor_id
ppid = processes[process.ref_parent_process.path[1]].os_pid if process.ref_parent_process else 0 ppid = processes[process.ref_parent_process.path[1]].monitor_id if process.ref_parent_process else 0
addr = ProcessAddress(pid=pid, ppid=ppid) addr = ProcessAddress(pid=int(pid), ppid=int(ppid))
yield ProcessHandle(address=addr, inner=process) yield ProcessHandle(address=addr, inner=process)

View File

@@ -235,34 +235,34 @@ def print_dynamic_features(processes, extractor: DynamicFeatureExtractor):
print(f" proc: {extractor.get_process_name(p)}: {feature}") print(f" proc: {extractor.get_process_name(p)}: {feature}")
for t in extractor.get_threads(p): for t in extractor.get_threads(p):
print(f" thread: {t.address.tid}") print(f" thread: {t.address.tid}")
for feature, addr in extractor.extract_thread_features(p, t): for feature, addr in extractor.extract_thread_features(p, t):
if is_global_feature(feature):
continue
if feature != Feature(0):
print(f" {format_address(addr)}: {feature}")
for call in extractor.get_calls(p, t):
apis = []
arguments = []
for feature, addr in extractor.extract_call_features(p, t, call):
if is_global_feature(feature): if is_global_feature(feature):
continue continue
if feature != Feature(0): if isinstance(feature, API):
print(f" {format_address(addr)}: {feature}") assert isinstance(addr, capa.features.address.DynamicCallAddress)
apis.append((addr.id, str(feature.value)))
for call in extractor.get_calls(p, t): if isinstance(feature, (Number, String)):
apis = [] arguments.append(str(feature.value))
arguments = []
for feature, addr in extractor.extract_call_features(p, t, call):
if is_global_feature(feature):
continue
if isinstance(feature, API): # if not apis:
assert isinstance(addr, capa.features.address.DynamicCallAddress) # print(f" arguments=[{', '.join(arguments)}]")
apis.append((addr.id, str(feature.value)))
if isinstance(feature, (Number, String)): for cid, api in apis:
arguments.append(str(feature.value)) print(f" call {cid}: {api}({', '.join(arguments)})")
if not apis:
print(f" arguments=[{', '.join(arguments)}]")
for cid, api in apis:
print(f" call {cid}: {api}({', '.join(arguments)})")
def ida_main(): def ida_main():