mirror of
https://github.com/mandiant/capa.git
synced 2025-12-12 15:49:46 -08:00
vmray: connect process, thread, and call
This commit is contained in:
@@ -5,10 +5,11 @@
|
|||||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
from typing import Dict
|
from typing import Dict, List
|
||||||
|
from collections import defaultdict
|
||||||
|
|
||||||
from capa.exceptions import UnsupportedFormatError
|
from capa.exceptions import UnsupportedFormatError
|
||||||
from capa.features.extractors.vmray.models import File, Flog, SummaryV2, StaticData
|
from capa.features.extractors.vmray.models import File, Flog, SummaryV2, StaticData, FunctionCall
|
||||||
|
|
||||||
|
|
||||||
class VMRayAnalysis:
|
class VMRayAnalysis:
|
||||||
@@ -18,6 +19,8 @@ class VMRayAnalysis:
|
|||||||
self.exports: Dict[int, str] = {}
|
self.exports: Dict[int, str] = {}
|
||||||
self.imports: Dict[int, str] = {}
|
self.imports: Dict[int, str] = {}
|
||||||
self.sections: Dict[int, str] = {}
|
self.sections: Dict[int, str] = {}
|
||||||
|
self.process_threads: Dict[int, List[int]] = defaultdict(list)
|
||||||
|
self.process_calls: Dict[int, Dict[int, List[FunctionCall]]] = defaultdict(lambda: defaultdict(list))
|
||||||
self.base_address: int
|
self.base_address: int
|
||||||
|
|
||||||
self.sample_file_name: str
|
self.sample_file_name: str
|
||||||
@@ -28,6 +31,8 @@ class VMRayAnalysis:
|
|||||||
self._compute_base_address()
|
self._compute_base_address()
|
||||||
self._compute_exports()
|
self._compute_exports()
|
||||||
self._compute_sections()
|
self._compute_sections()
|
||||||
|
self._compute_process_threads()
|
||||||
|
self._compute_process_calls()
|
||||||
|
|
||||||
if not self.sample_file_static_data.pe:
|
if not self.sample_file_static_data.pe:
|
||||||
raise UnsupportedFormatError("VMRay feature extractor only supports PE at this time")
|
raise UnsupportedFormatError("VMRay feature extractor only supports PE at this time")
|
||||||
@@ -61,3 +66,18 @@ class VMRayAnalysis:
|
|||||||
if self.sample_file_static_data.pe:
|
if self.sample_file_static_data.pe:
|
||||||
for section in self.sample_file_static_data.pe.sections:
|
for section in self.sample_file_static_data.pe.sections:
|
||||||
self.sections[section.virtual_address] = section.name
|
self.sections[section.virtual_address] = section.name
|
||||||
|
|
||||||
|
def _compute_process_threads(self):
|
||||||
|
for function_call in self.flog.analysis.function_calls:
|
||||||
|
pid: int = int(function_call.process_id)
|
||||||
|
tid: int = int(function_call.thread_id)
|
||||||
|
|
||||||
|
if tid not in self.process_threads[pid]:
|
||||||
|
self.process_threads[pid].append(tid)
|
||||||
|
|
||||||
|
def _compute_process_calls(self):
|
||||||
|
for function_call in self.flog.analysis.function_calls:
|
||||||
|
pid: int = int(function_call.process_id)
|
||||||
|
tid: int = int(function_call.thread_id)
|
||||||
|
|
||||||
|
self.process_calls[pid][tid].append(function_call)
|
||||||
|
|||||||
@@ -21,6 +21,10 @@ def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -
|
|||||||
yields: Feature, address; where Feature is either: API, Number, or String.
|
yields: Feature, address; where Feature is either: API, Number, or String.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
# TODO update for new models
|
||||||
|
# print(ch)
|
||||||
|
return
|
||||||
|
|
||||||
# Extract API name
|
# Extract API name
|
||||||
yield API(ch.inner.name), ch.inner.address
|
yield API(ch.inner.name), ch.inner.address
|
||||||
|
|
||||||
|
|||||||
@@ -14,10 +14,11 @@ from zipfile import ZipFile
|
|||||||
import xmltodict
|
import xmltodict
|
||||||
|
|
||||||
import capa.helpers
|
import capa.helpers
|
||||||
|
import capa.features.extractors.vmray.call
|
||||||
import capa.features.extractors.vmray.file
|
import capa.features.extractors.vmray.file
|
||||||
import capa.features.extractors.vmray.global_
|
import capa.features.extractors.vmray.global_
|
||||||
from capa.features.common import Feature
|
from capa.features.common import Feature, Characteristic
|
||||||
from capa.features.address import Address, AbsoluteVirtualAddress
|
from capa.features.address import NO_ADDRESS, Address, ThreadAddress, DynamicCallAddress, AbsoluteVirtualAddress
|
||||||
from capa.features.extractors.vmray import VMRayAnalysis
|
from capa.features.extractors.vmray import VMRayAnalysis
|
||||||
from capa.features.extractors.vmray.models import Flog, Process, SummaryV2
|
from capa.features.extractors.vmray.models import Flog, Process, SummaryV2
|
||||||
from capa.features.extractors.base_extractor import (
|
from capa.features.extractors.base_extractor import (
|
||||||
@@ -28,8 +29,6 @@ from capa.features.extractors.base_extractor import (
|
|||||||
DynamicFeatureExtractor,
|
DynamicFeatureExtractor,
|
||||||
)
|
)
|
||||||
|
|
||||||
# TODO also/or look into xmltodict?
|
|
||||||
|
|
||||||
|
|
||||||
class VMRayExtractor(DynamicFeatureExtractor):
|
class VMRayExtractor(DynamicFeatureExtractor):
|
||||||
def __init__(self, analysis: VMRayAnalysis):
|
def __init__(self, analysis: VMRayAnalysis):
|
||||||
@@ -68,23 +67,27 @@ class VMRayExtractor(DynamicFeatureExtractor):
|
|||||||
return process.image_name
|
return process.image_name
|
||||||
|
|
||||||
def get_threads(self, ph: ProcessHandle) -> Iterator[ThreadHandle]:
|
def get_threads(self, ph: ProcessHandle) -> Iterator[ThreadHandle]:
|
||||||
# TODO (meh)
|
for thread in self.analysis.process_threads[ph.address.pid]:
|
||||||
yield from []
|
address: ThreadAddress = ThreadAddress(process=ph.address, tid=thread)
|
||||||
|
yield ThreadHandle(address=address, inner={})
|
||||||
|
|
||||||
def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[Tuple[Feature, Address]]:
|
||||||
# force this routine to be a generator,
|
if False:
|
||||||
# but we don't actually have any elements to generate.
|
# force this routine to be a generator,
|
||||||
yield from []
|
# but we don't actually have any elements to generate.
|
||||||
|
yield Characteristic("never"), NO_ADDRESS
|
||||||
|
return
|
||||||
|
|
||||||
def get_calls(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[CallHandle]:
|
def get_calls(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[CallHandle]:
|
||||||
# TODO (meh)
|
for function_call in self.analysis.process_calls[ph.address.pid][th.address.tid]:
|
||||||
yield from []
|
addr = DynamicCallAddress(thread=th.address, id=int(function_call.fncall_id))
|
||||||
|
yield CallHandle(address=addr, inner=function_call)
|
||||||
|
|
||||||
def extract_call_features(
|
def extract_call_features(
|
||||||
self, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle
|
self, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[Tuple[Feature, Address]]:
|
||||||
# TODO (meh)
|
# TODO (meh)
|
||||||
yield from []
|
yield from capa.features.extractors.vmray.call.extract_features(ph, th, ch)
|
||||||
|
|
||||||
def get_call_name(self, ph, th, ch) -> str:
|
def get_call_name(self, ph, th, ch) -> str:
|
||||||
# TODO (meh)
|
# TODO (meh)
|
||||||
|
|||||||
@@ -22,10 +22,10 @@ def get_processes(analysis: VMRayAnalysis) -> Iterator[ProcessHandle]:
|
|||||||
processes: Dict[str, Process] = analysis.sv2.processes
|
processes: Dict[str, Process] = analysis.sv2.processes
|
||||||
|
|
||||||
for _, process in processes.items():
|
for _, process in processes.items():
|
||||||
pid = process.os_pid
|
pid = process.monitor_id
|
||||||
ppid = processes[process.ref_parent_process.path[1]].os_pid if process.ref_parent_process else 0
|
ppid = processes[process.ref_parent_process.path[1]].monitor_id if process.ref_parent_process else 0
|
||||||
|
|
||||||
addr = ProcessAddress(pid=pid, ppid=ppid)
|
addr = ProcessAddress(pid=int(pid), ppid=int(ppid))
|
||||||
yield ProcessHandle(address=addr, inner=process)
|
yield ProcessHandle(address=addr, inner=process)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -235,34 +235,34 @@ def print_dynamic_features(processes, extractor: DynamicFeatureExtractor):
|
|||||||
|
|
||||||
print(f" proc: {extractor.get_process_name(p)}: {feature}")
|
print(f" proc: {extractor.get_process_name(p)}: {feature}")
|
||||||
|
|
||||||
for t in extractor.get_threads(p):
|
for t in extractor.get_threads(p):
|
||||||
print(f" thread: {t.address.tid}")
|
print(f" thread: {t.address.tid}")
|
||||||
for feature, addr in extractor.extract_thread_features(p, t):
|
for feature, addr in extractor.extract_thread_features(p, t):
|
||||||
|
if is_global_feature(feature):
|
||||||
|
continue
|
||||||
|
|
||||||
|
if feature != Feature(0):
|
||||||
|
print(f" {format_address(addr)}: {feature}")
|
||||||
|
|
||||||
|
for call in extractor.get_calls(p, t):
|
||||||
|
apis = []
|
||||||
|
arguments = []
|
||||||
|
for feature, addr in extractor.extract_call_features(p, t, call):
|
||||||
if is_global_feature(feature):
|
if is_global_feature(feature):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if feature != Feature(0):
|
if isinstance(feature, API):
|
||||||
print(f" {format_address(addr)}: {feature}")
|
assert isinstance(addr, capa.features.address.DynamicCallAddress)
|
||||||
|
apis.append((addr.id, str(feature.value)))
|
||||||
|
|
||||||
for call in extractor.get_calls(p, t):
|
if isinstance(feature, (Number, String)):
|
||||||
apis = []
|
arguments.append(str(feature.value))
|
||||||
arguments = []
|
|
||||||
for feature, addr in extractor.extract_call_features(p, t, call):
|
|
||||||
if is_global_feature(feature):
|
|
||||||
continue
|
|
||||||
|
|
||||||
if isinstance(feature, API):
|
# if not apis:
|
||||||
assert isinstance(addr, capa.features.address.DynamicCallAddress)
|
# print(f" arguments=[{', '.join(arguments)}]")
|
||||||
apis.append((addr.id, str(feature.value)))
|
|
||||||
|
|
||||||
if isinstance(feature, (Number, String)):
|
for cid, api in apis:
|
||||||
arguments.append(str(feature.value))
|
print(f" call {cid}: {api}({', '.join(arguments)})")
|
||||||
|
|
||||||
if not apis:
|
|
||||||
print(f" arguments=[{', '.join(arguments)}]")
|
|
||||||
|
|
||||||
for cid, api in apis:
|
|
||||||
print(f" call {cid}: {api}({', '.join(arguments)})")
|
|
||||||
|
|
||||||
|
|
||||||
def ida_main():
|
def ida_main():
|
||||||
|
|||||||
Reference in New Issue
Block a user