mirror of
https://github.com/mandiant/capa.git
synced 2025-12-12 07:40:38 -08:00
vmray: connect process, thread, and call
This commit is contained in:
@@ -5,10 +5,11 @@
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
from typing import Dict
|
||||
from typing import Dict, List
|
||||
from collections import defaultdict
|
||||
|
||||
from capa.exceptions import UnsupportedFormatError
|
||||
from capa.features.extractors.vmray.models import File, Flog, SummaryV2, StaticData
|
||||
from capa.features.extractors.vmray.models import File, Flog, SummaryV2, StaticData, FunctionCall
|
||||
|
||||
|
||||
class VMRayAnalysis:
|
||||
@@ -18,6 +19,8 @@ class VMRayAnalysis:
|
||||
self.exports: Dict[int, str] = {}
|
||||
self.imports: Dict[int, str] = {}
|
||||
self.sections: Dict[int, str] = {}
|
||||
self.process_threads: Dict[int, List[int]] = defaultdict(list)
|
||||
self.process_calls: Dict[int, Dict[int, List[FunctionCall]]] = defaultdict(lambda: defaultdict(list))
|
||||
self.base_address: int
|
||||
|
||||
self.sample_file_name: str
|
||||
@@ -28,6 +31,8 @@ class VMRayAnalysis:
|
||||
self._compute_base_address()
|
||||
self._compute_exports()
|
||||
self._compute_sections()
|
||||
self._compute_process_threads()
|
||||
self._compute_process_calls()
|
||||
|
||||
if not self.sample_file_static_data.pe:
|
||||
raise UnsupportedFormatError("VMRay feature extractor only supports PE at this time")
|
||||
@@ -61,3 +66,18 @@ class VMRayAnalysis:
|
||||
if self.sample_file_static_data.pe:
|
||||
for section in self.sample_file_static_data.pe.sections:
|
||||
self.sections[section.virtual_address] = section.name
|
||||
|
||||
def _compute_process_threads(self):
|
||||
for function_call in self.flog.analysis.function_calls:
|
||||
pid: int = int(function_call.process_id)
|
||||
tid: int = int(function_call.thread_id)
|
||||
|
||||
if tid not in self.process_threads[pid]:
|
||||
self.process_threads[pid].append(tid)
|
||||
|
||||
def _compute_process_calls(self):
|
||||
for function_call in self.flog.analysis.function_calls:
|
||||
pid: int = int(function_call.process_id)
|
||||
tid: int = int(function_call.thread_id)
|
||||
|
||||
self.process_calls[pid][tid].append(function_call)
|
||||
|
||||
@@ -21,6 +21,10 @@ def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -
|
||||
yields: Feature, address; where Feature is either: API, Number, or String.
|
||||
"""
|
||||
|
||||
# TODO update for new models
|
||||
# print(ch)
|
||||
return
|
||||
|
||||
# Extract API name
|
||||
yield API(ch.inner.name), ch.inner.address
|
||||
|
||||
|
||||
@@ -14,10 +14,11 @@ from zipfile import ZipFile
|
||||
import xmltodict
|
||||
|
||||
import capa.helpers
|
||||
import capa.features.extractors.vmray.call
|
||||
import capa.features.extractors.vmray.file
|
||||
import capa.features.extractors.vmray.global_
|
||||
from capa.features.common import Feature
|
||||
from capa.features.address import Address, AbsoluteVirtualAddress
|
||||
from capa.features.common import Feature, Characteristic
|
||||
from capa.features.address import NO_ADDRESS, Address, ThreadAddress, DynamicCallAddress, AbsoluteVirtualAddress
|
||||
from capa.features.extractors.vmray import VMRayAnalysis
|
||||
from capa.features.extractors.vmray.models import Flog, Process, SummaryV2
|
||||
from capa.features.extractors.base_extractor import (
|
||||
@@ -28,8 +29,6 @@ from capa.features.extractors.base_extractor import (
|
||||
DynamicFeatureExtractor,
|
||||
)
|
||||
|
||||
# TODO also/or look into xmltodict?
|
||||
|
||||
|
||||
class VMRayExtractor(DynamicFeatureExtractor):
|
||||
def __init__(self, analysis: VMRayAnalysis):
|
||||
@@ -68,23 +67,27 @@ class VMRayExtractor(DynamicFeatureExtractor):
|
||||
return process.image_name
|
||||
|
||||
def get_threads(self, ph: ProcessHandle) -> Iterator[ThreadHandle]:
|
||||
# TODO (meh)
|
||||
yield from []
|
||||
for thread in self.analysis.process_threads[ph.address.pid]:
|
||||
address: ThreadAddress = ThreadAddress(process=ph.address, tid=thread)
|
||||
yield ThreadHandle(address=address, inner={})
|
||||
|
||||
def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||
# force this routine to be a generator,
|
||||
# but we don't actually have any elements to generate.
|
||||
yield from []
|
||||
if False:
|
||||
# force this routine to be a generator,
|
||||
# but we don't actually have any elements to generate.
|
||||
yield Characteristic("never"), NO_ADDRESS
|
||||
return
|
||||
|
||||
def get_calls(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[CallHandle]:
|
||||
# TODO (meh)
|
||||
yield from []
|
||||
for function_call in self.analysis.process_calls[ph.address.pid][th.address.tid]:
|
||||
addr = DynamicCallAddress(thread=th.address, id=int(function_call.fncall_id))
|
||||
yield CallHandle(address=addr, inner=function_call)
|
||||
|
||||
def extract_call_features(
|
||||
self, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle
|
||||
) -> Iterator[Tuple[Feature, Address]]:
|
||||
# TODO (meh)
|
||||
yield from []
|
||||
yield from capa.features.extractors.vmray.call.extract_features(ph, th, ch)
|
||||
|
||||
def get_call_name(self, ph, th, ch) -> str:
|
||||
# TODO (meh)
|
||||
|
||||
@@ -22,10 +22,10 @@ def get_processes(analysis: VMRayAnalysis) -> Iterator[ProcessHandle]:
|
||||
processes: Dict[str, Process] = analysis.sv2.processes
|
||||
|
||||
for _, process in processes.items():
|
||||
pid = process.os_pid
|
||||
ppid = processes[process.ref_parent_process.path[1]].os_pid if process.ref_parent_process else 0
|
||||
pid = process.monitor_id
|
||||
ppid = processes[process.ref_parent_process.path[1]].monitor_id if process.ref_parent_process else 0
|
||||
|
||||
addr = ProcessAddress(pid=pid, ppid=ppid)
|
||||
addr = ProcessAddress(pid=int(pid), ppid=int(ppid))
|
||||
yield ProcessHandle(address=addr, inner=process)
|
||||
|
||||
|
||||
|
||||
@@ -235,34 +235,34 @@ def print_dynamic_features(processes, extractor: DynamicFeatureExtractor):
|
||||
|
||||
print(f" proc: {extractor.get_process_name(p)}: {feature}")
|
||||
|
||||
for t in extractor.get_threads(p):
|
||||
print(f" thread: {t.address.tid}")
|
||||
for feature, addr in extractor.extract_thread_features(p, t):
|
||||
for t in extractor.get_threads(p):
|
||||
print(f" thread: {t.address.tid}")
|
||||
for feature, addr in extractor.extract_thread_features(p, t):
|
||||
if is_global_feature(feature):
|
||||
continue
|
||||
|
||||
if feature != Feature(0):
|
||||
print(f" {format_address(addr)}: {feature}")
|
||||
|
||||
for call in extractor.get_calls(p, t):
|
||||
apis = []
|
||||
arguments = []
|
||||
for feature, addr in extractor.extract_call_features(p, t, call):
|
||||
if is_global_feature(feature):
|
||||
continue
|
||||
|
||||
if feature != Feature(0):
|
||||
print(f" {format_address(addr)}: {feature}")
|
||||
if isinstance(feature, API):
|
||||
assert isinstance(addr, capa.features.address.DynamicCallAddress)
|
||||
apis.append((addr.id, str(feature.value)))
|
||||
|
||||
for call in extractor.get_calls(p, t):
|
||||
apis = []
|
||||
arguments = []
|
||||
for feature, addr in extractor.extract_call_features(p, t, call):
|
||||
if is_global_feature(feature):
|
||||
continue
|
||||
if isinstance(feature, (Number, String)):
|
||||
arguments.append(str(feature.value))
|
||||
|
||||
if isinstance(feature, API):
|
||||
assert isinstance(addr, capa.features.address.DynamicCallAddress)
|
||||
apis.append((addr.id, str(feature.value)))
|
||||
# if not apis:
|
||||
# print(f" arguments=[{', '.join(arguments)}]")
|
||||
|
||||
if isinstance(feature, (Number, String)):
|
||||
arguments.append(str(feature.value))
|
||||
|
||||
if not apis:
|
||||
print(f" arguments=[{', '.join(arguments)}]")
|
||||
|
||||
for cid, api in apis:
|
||||
print(f" call {cid}: {api}({', '.join(arguments)})")
|
||||
for cid, api in apis:
|
||||
print(f" call {cid}: {api}({', '.join(arguments)})")
|
||||
|
||||
|
||||
def ida_main():
|
||||
|
||||
Reference in New Issue
Block a user