mirror of
https://github.com/mandiant/capa.git
synced 2025-12-12 23:59:48 -08:00
Compare commits
6 Commits
backend/py
...
bn-cache-i
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
99daa6370d | ||
|
|
61e1684783 | ||
|
|
73f56f585c | ||
|
|
319dbfe299 | ||
|
|
a909d022a6 | ||
|
|
999f91baa1 |
@@ -5,15 +5,19 @@
|
|||||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
import logging
|
||||||
from typing import Iterator
|
from typing import Iterator
|
||||||
|
from collections import defaultdict
|
||||||
|
|
||||||
import binaryninja as binja
|
import binaryninja as binja
|
||||||
from binaryninja import ILException
|
from binaryninja import Function, BinaryView, SymbolType, ILException, RegisterValueType, LowLevelILOperation
|
||||||
|
|
||||||
|
import capa.perf
|
||||||
import capa.features.extractors.elf
|
import capa.features.extractors.elf
|
||||||
import capa.features.extractors.binja.file
|
import capa.features.extractors.binja.file
|
||||||
import capa.features.extractors.binja.insn
|
import capa.features.extractors.binja.insn
|
||||||
import capa.features.extractors.binja.global_
|
import capa.features.extractors.binja.global_
|
||||||
|
import capa.features.extractors.binja.helpers
|
||||||
import capa.features.extractors.binja.function
|
import capa.features.extractors.binja.function
|
||||||
import capa.features.extractors.binja.basicblock
|
import capa.features.extractors.binja.basicblock
|
||||||
from capa.features.common import Feature
|
from capa.features.common import Feature
|
||||||
@@ -26,6 +30,8 @@ from capa.features.extractors.base_extractor import (
|
|||||||
StaticFeatureExtractor,
|
StaticFeatureExtractor,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class BinjaFeatureExtractor(StaticFeatureExtractor):
|
class BinjaFeatureExtractor(StaticFeatureExtractor):
|
||||||
def __init__(self, bv: binja.BinaryView):
|
def __init__(self, bv: binja.BinaryView):
|
||||||
@@ -36,6 +42,9 @@ class BinjaFeatureExtractor(StaticFeatureExtractor):
|
|||||||
self.global_features.extend(capa.features.extractors.binja.global_.extract_os(self.bv))
|
self.global_features.extend(capa.features.extractors.binja.global_.extract_os(self.bv))
|
||||||
self.global_features.extend(capa.features.extractors.binja.global_.extract_arch(self.bv))
|
self.global_features.extend(capa.features.extractors.binja.global_.extract_arch(self.bv))
|
||||||
|
|
||||||
|
with capa.perf.timing("binary ninja: computing call graph"):
|
||||||
|
self._call_graph = self._build_call_graph()
|
||||||
|
|
||||||
def get_base_address(self):
|
def get_base_address(self):
|
||||||
return AbsoluteVirtualAddress(self.bv.start)
|
return AbsoluteVirtualAddress(self.bv.start)
|
||||||
|
|
||||||
@@ -45,9 +54,27 @@ class BinjaFeatureExtractor(StaticFeatureExtractor):
|
|||||||
def extract_file_features(self):
|
def extract_file_features(self):
|
||||||
yield from capa.features.extractors.binja.file.extract_features(self.bv)
|
yield from capa.features.extractors.binja.file.extract_features(self.bv)
|
||||||
|
|
||||||
|
def _build_call_graph(self):
|
||||||
|
# from function address to function addresses
|
||||||
|
calls_from: defaultdict[int, set[int]] = defaultdict(set)
|
||||||
|
calls_to: defaultdict[int, set[int]] = defaultdict(set)
|
||||||
|
|
||||||
|
f: Function
|
||||||
|
for f in self.bv.functions:
|
||||||
|
for caller in f.callers:
|
||||||
|
calls_from[caller.start].add(f.start)
|
||||||
|
calls_to[f.start].add(caller.start)
|
||||||
|
|
||||||
|
call_graph = {
|
||||||
|
"calls_to": calls_to,
|
||||||
|
"calls_from": calls_from,
|
||||||
|
}
|
||||||
|
|
||||||
|
return call_graph
|
||||||
|
|
||||||
def get_functions(self) -> Iterator[FunctionHandle]:
|
def get_functions(self) -> Iterator[FunctionHandle]:
|
||||||
for f in self.bv.functions:
|
for f in self.bv.functions:
|
||||||
yield FunctionHandle(address=AbsoluteVirtualAddress(f.start), inner=f)
|
yield FunctionHandle(address=AbsoluteVirtualAddress(f.start), inner=f, ctx={"call_graph": self._call_graph})
|
||||||
|
|
||||||
def extract_function_features(self, fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
def extract_function_features(self, fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.binja.function.extract_features(fh)
|
yield from capa.features.extractors.binja.function.extract_features(fh)
|
||||||
@@ -76,13 +103,16 @@ class BinjaFeatureExtractor(StaticFeatureExtractor):
|
|||||||
yield from capa.features.extractors.binja.basicblock.extract_features(fh, bbh)
|
yield from capa.features.extractors.binja.basicblock.extract_features(fh, bbh)
|
||||||
|
|
||||||
def get_instructions(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[InsnHandle]:
|
def get_instructions(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[InsnHandle]:
|
||||||
import capa.features.extractors.binja.helpers as binja_helpers
|
f: binja.Function = fh.inner
|
||||||
|
|
||||||
bb: tuple[binja.BasicBlock, binja.MediumLevelILBasicBlock] = bbh.inner
|
bb: binja.BasicBlock
|
||||||
addr = bb[0].start
|
mlbb: binja.MediumLevelILBasicBlock
|
||||||
|
bb, mlbb = bbh.inner
|
||||||
|
|
||||||
for text, length in bb[0]:
|
addr: int = bb.start
|
||||||
insn = binja_helpers.DisassemblyInstruction(addr, length, text)
|
for text, length in bb:
|
||||||
|
llil = f.get_llils_at(addr)
|
||||||
|
insn = capa.features.extractors.binja.helpers.DisassemblyInstruction(addr, length, text, llil)
|
||||||
yield InsnHandle(address=AbsoluteVirtualAddress(addr), inner=insn)
|
yield InsnHandle(address=AbsoluteVirtualAddress(addr), inner=insn)
|
||||||
addr += length
|
addr += length
|
||||||
|
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
from typing import Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
from binaryninja import Function, BinaryView, SymbolType, ILException, RegisterValueType, LowLevelILOperation
|
from binaryninja import Function, BinaryView, SymbolType
|
||||||
|
|
||||||
from capa.features.file import FunctionName
|
from capa.features.file import FunctionName
|
||||||
from capa.features.common import Feature, Characteristic
|
from capa.features.common import Feature, Characteristic
|
||||||
@@ -20,38 +20,24 @@ def extract_function_calls_to(fh: FunctionHandle):
|
|||||||
"""extract callers to a function"""
|
"""extract callers to a function"""
|
||||||
func: Function = fh.inner
|
func: Function = fh.inner
|
||||||
|
|
||||||
for caller in func.caller_sites:
|
caller: int
|
||||||
# Everything that is a code reference to the current function is considered a caller, which actually includes
|
for caller in fh.ctx["call_graph"]["calls_to"].get(func.start, []):
|
||||||
# many other references that are NOT a caller. For example, an instruction `push function_start` will also be
|
if caller == func.start:
|
||||||
# considered a caller to the function
|
|
||||||
llil = None
|
|
||||||
try:
|
|
||||||
# Temporary fix for https://github.com/Vector35/binaryninja-api/issues/6020. Since `.llil` can throw an
|
|
||||||
# exception rather than returning None
|
|
||||||
llil = caller.llil
|
|
||||||
except ILException:
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if (llil is None) or llil.operation not in [
|
yield Characteristic("calls to"), AbsoluteVirtualAddress(caller)
|
||||||
LowLevelILOperation.LLIL_CALL,
|
|
||||||
LowLevelILOperation.LLIL_CALL_STACK_ADJUST,
|
|
||||||
LowLevelILOperation.LLIL_JUMP,
|
def extract_function_calls_from(fh: FunctionHandle):
|
||||||
LowLevelILOperation.LLIL_TAILCALL,
|
"""extract callers from a function"""
|
||||||
]:
|
func: Function = fh.inner
|
||||||
|
|
||||||
|
callee: int
|
||||||
|
for callee in fh.ctx["call_graph"]["calls_from"].get(func.start, []):
|
||||||
|
if callee == func.start:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if llil.dest.value.type not in [
|
yield Characteristic("calls from"), AbsoluteVirtualAddress(callee)
|
||||||
RegisterValueType.ImportedAddressValue,
|
|
||||||
RegisterValueType.ConstantValue,
|
|
||||||
RegisterValueType.ConstantPointerValue,
|
|
||||||
]:
|
|
||||||
continue
|
|
||||||
|
|
||||||
address = llil.dest.value.value
|
|
||||||
if address != func.start:
|
|
||||||
continue
|
|
||||||
|
|
||||||
yield Characteristic("calls to"), AbsoluteVirtualAddress(caller.address)
|
|
||||||
|
|
||||||
|
|
||||||
def extract_function_loop(fh: FunctionHandle):
|
def extract_function_loop(fh: FunctionHandle):
|
||||||
@@ -72,13 +58,12 @@ def extract_function_loop(fh: FunctionHandle):
|
|||||||
def extract_recursive_call(fh: FunctionHandle):
|
def extract_recursive_call(fh: FunctionHandle):
|
||||||
"""extract recursive function call"""
|
"""extract recursive function call"""
|
||||||
func: Function = fh.inner
|
func: Function = fh.inner
|
||||||
bv: BinaryView = func.view
|
|
||||||
if bv is None:
|
|
||||||
return
|
|
||||||
|
|
||||||
for ref in bv.get_code_refs(func.start):
|
caller: int
|
||||||
if ref.function == func:
|
for caller in fh.ctx["call_graph"]["calls_to"].get(func.start, []):
|
||||||
|
if caller == func.start:
|
||||||
yield Characteristic("recursive call"), fh.address
|
yield Characteristic("recursive call"), fh.address
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
def extract_function_name(fh: FunctionHandle):
|
def extract_function_name(fh: FunctionHandle):
|
||||||
@@ -108,4 +93,10 @@ def extract_features(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
|||||||
yield feature, addr
|
yield feature, addr
|
||||||
|
|
||||||
|
|
||||||
FUNCTION_HANDLERS = (extract_function_calls_to, extract_function_loop, extract_recursive_call, extract_function_name)
|
FUNCTION_HANDLERS = (
|
||||||
|
extract_function_calls_to,
|
||||||
|
extract_function_calls_from,
|
||||||
|
extract_function_loop,
|
||||||
|
extract_recursive_call,
|
||||||
|
extract_function_name,
|
||||||
|
)
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ import re
|
|||||||
from typing import Callable
|
from typing import Callable
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
|
||||||
from binaryninja import BinaryView, LowLevelILInstruction
|
from binaryninja import BinaryView, LowLevelILOperation, LowLevelILInstruction
|
||||||
from binaryninja.architecture import InstructionTextToken
|
from binaryninja.architecture import InstructionTextToken
|
||||||
|
|
||||||
|
|
||||||
@@ -18,6 +18,24 @@ class DisassemblyInstruction:
|
|||||||
address: int
|
address: int
|
||||||
length: int
|
length: int
|
||||||
text: list[InstructionTextToken]
|
text: list[InstructionTextToken]
|
||||||
|
llil: list[LowLevelILInstruction]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_call(self):
|
||||||
|
if not self.llil:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# TODO(williballenthin): when to use one vs many llil instructions
|
||||||
|
# https://github.com/Vector35/binaryninja-api/issues/6205
|
||||||
|
llil = self.llil[0]
|
||||||
|
if not llil:
|
||||||
|
return False
|
||||||
|
|
||||||
|
return llil.operation in [
|
||||||
|
LowLevelILOperation.LLIL_CALL,
|
||||||
|
LowLevelILOperation.LLIL_CALL_STACK_ADJUST,
|
||||||
|
LowLevelILOperation.LLIL_TAILCALL,
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
LLIL_VISITOR = Callable[[LowLevelILInstruction, LowLevelILInstruction, int], bool]
|
LLIL_VISITOR = Callable[[LowLevelILInstruction, LowLevelILInstruction, int], bool]
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ from binaryninja import (
|
|||||||
import capa.features.extractors.helpers
|
import capa.features.extractors.helpers
|
||||||
from capa.features.insn import API, MAX_STRUCTURE_SIZE, Number, Offset, Mnemonic, OperandNumber, OperandOffset
|
from capa.features.insn import API, MAX_STRUCTURE_SIZE, Number, Offset, Mnemonic, OperandNumber, OperandOffset
|
||||||
from capa.features.common import MAX_BYTES_FEATURE_SIZE, Bytes, String, Feature, Characteristic
|
from capa.features.common import MAX_BYTES_FEATURE_SIZE, Bytes, String, Feature, Characteristic
|
||||||
from capa.features.address import Address, AbsoluteVirtualAddress
|
from capa.features.address import Address
|
||||||
from capa.features.extractors.binja.helpers import DisassemblyInstruction, visit_llil_exprs
|
from capa.features.extractors.binja.helpers import DisassemblyInstruction, visit_llil_exprs
|
||||||
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle
|
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle
|
||||||
|
|
||||||
@@ -32,6 +32,7 @@ from capa.features.extractors.base_extractor import BBHandle, InsnHandle, Functi
|
|||||||
SECURITY_COOKIE_BYTES_DELTA = 0x40
|
SECURITY_COOKIE_BYTES_DELTA = 0x40
|
||||||
|
|
||||||
|
|
||||||
|
# TODO: move this to call graph pass
|
||||||
# check if a function is a stub function to another function/symbol. The criteria is:
|
# check if a function is a stub function to another function/symbol. The criteria is:
|
||||||
# 1. The function must only have one basic block
|
# 1. The function must only have one basic block
|
||||||
# 2. The function must only make one call/jump to another address
|
# 2. The function must only make one call/jump to another address
|
||||||
@@ -82,8 +83,9 @@ def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle)
|
|||||||
"""
|
"""
|
||||||
func: Function = fh.inner
|
func: Function = fh.inner
|
||||||
bv: BinaryView = func.view
|
bv: BinaryView = func.view
|
||||||
|
insn: DisassemblyInstruction = ih.inner
|
||||||
|
|
||||||
for llil in func.get_llils_at(ih.address):
|
for llil in insn.llil:
|
||||||
if llil.operation in [
|
if llil.operation in [
|
||||||
LowLevelILOperation.LLIL_CALL,
|
LowLevelILOperation.LLIL_CALL,
|
||||||
LowLevelILOperation.LLIL_CALL_STACK_ADJUST,
|
LowLevelILOperation.LLIL_CALL_STACK_ADJUST,
|
||||||
@@ -138,10 +140,11 @@ def extract_insn_number_features(
|
|||||||
example:
|
example:
|
||||||
push 3136B0h ; dwControlCode
|
push 3136B0h ; dwControlCode
|
||||||
"""
|
"""
|
||||||
func: Function = fh.inner
|
insn: DisassemblyInstruction = ih.inner
|
||||||
|
|
||||||
results: list[tuple[Any[Number, OperandNumber], Address]] = []
|
results: list[tuple[Any[Number, OperandNumber], Address]] = []
|
||||||
|
|
||||||
|
# TODO: try to move this out of line
|
||||||
def llil_checker(il: LowLevelILInstruction, parent: LowLevelILInstruction, index: int) -> bool:
|
def llil_checker(il: LowLevelILInstruction, parent: LowLevelILInstruction, index: int) -> bool:
|
||||||
if il.operation == LowLevelILOperation.LLIL_LOAD:
|
if il.operation == LowLevelILOperation.LLIL_LOAD:
|
||||||
return False
|
return False
|
||||||
@@ -165,7 +168,7 @@ def extract_insn_number_features(
|
|||||||
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
for llil in func.get_llils_at(ih.address):
|
for llil in insn.llil:
|
||||||
visit_llil_exprs(llil, llil_checker)
|
visit_llil_exprs(llil, llil_checker)
|
||||||
|
|
||||||
yield from results
|
yield from results
|
||||||
@@ -179,11 +182,11 @@ def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandl
|
|||||||
"""
|
"""
|
||||||
func: Function = fh.inner
|
func: Function = fh.inner
|
||||||
bv: BinaryView = func.view
|
bv: BinaryView = func.view
|
||||||
|
insn: DisassemblyInstruction = ih.inner
|
||||||
|
|
||||||
candidate_addrs = set()
|
candidate_addrs = set()
|
||||||
|
|
||||||
llil = func.get_llil_at(ih.address)
|
if insn.is_call:
|
||||||
if llil is None or llil.operation in [LowLevelILOperation.LLIL_CALL, LowLevelILOperation.LLIL_CALL_STACK_ADJUST]:
|
|
||||||
return
|
return
|
||||||
|
|
||||||
for ref in bv.get_code_refs_from(ih.address):
|
for ref in bv.get_code_refs_from(ih.address):
|
||||||
@@ -205,7 +208,7 @@ def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandl
|
|||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
for llil in func.get_llils_at(ih.address):
|
for llil in insn.llil:
|
||||||
visit_llil_exprs(llil, llil_checker)
|
visit_llil_exprs(llil, llil_checker)
|
||||||
|
|
||||||
for addr in candidate_addrs:
|
for addr in candidate_addrs:
|
||||||
@@ -227,6 +230,7 @@ def extract_insn_string_features(
|
|||||||
"""
|
"""
|
||||||
func: Function = fh.inner
|
func: Function = fh.inner
|
||||||
bv: BinaryView = func.view
|
bv: BinaryView = func.view
|
||||||
|
insn: DisassemblyInstruction = ih.inner
|
||||||
|
|
||||||
candidate_addrs = set()
|
candidate_addrs = set()
|
||||||
|
|
||||||
@@ -250,7 +254,7 @@ def extract_insn_string_features(
|
|||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
for llil in func.get_llils_at(ih.address):
|
for llil in insn.llil:
|
||||||
visit_llil_exprs(llil, llil_checker)
|
visit_llil_exprs(llil, llil_checker)
|
||||||
|
|
||||||
# Now we have all the candidate address, check them for string or pointer to string
|
# Now we have all the candidate address, check them for string or pointer to string
|
||||||
@@ -283,6 +287,7 @@ def extract_insn_offset_features(
|
|||||||
.text:0040112F cmp [esi+4], ebx
|
.text:0040112F cmp [esi+4], ebx
|
||||||
"""
|
"""
|
||||||
func: Function = fh.inner
|
func: Function = fh.inner
|
||||||
|
insn: DisassemblyInstruction = ih.inner
|
||||||
|
|
||||||
results: list[tuple[Any[Offset, OperandOffset], Address]] = []
|
results: list[tuple[Any[Offset, OperandOffset], Address]] = []
|
||||||
address_size = func.view.arch.address_size * 8
|
address_size = func.view.arch.address_size * 8
|
||||||
@@ -327,7 +332,7 @@ def extract_insn_offset_features(
|
|||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
for llil in func.get_llils_at(ih.address):
|
for llil in insn.llil:
|
||||||
visit_llil_exprs(llil, llil_checker)
|
visit_llil_exprs(llil, llil_checker)
|
||||||
|
|
||||||
yield from results
|
yield from results
|
||||||
@@ -367,7 +372,7 @@ def extract_insn_nzxor_characteristic_features(
|
|||||||
parse instruction non-zeroing XOR instruction
|
parse instruction non-zeroing XOR instruction
|
||||||
ignore expected non-zeroing XORs, e.g. security cookies
|
ignore expected non-zeroing XORs, e.g. security cookies
|
||||||
"""
|
"""
|
||||||
func: Function = fh.inner
|
insn: DisassemblyInstruction = ih.inner
|
||||||
|
|
||||||
results = []
|
results = []
|
||||||
|
|
||||||
@@ -383,7 +388,7 @@ def extract_insn_nzxor_characteristic_features(
|
|||||||
else:
|
else:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
for llil in func.get_llils_at(ih.address):
|
for llil in insn.llil:
|
||||||
visit_llil_exprs(llil, llil_checker)
|
visit_llil_exprs(llil, llil_checker)
|
||||||
|
|
||||||
yield from results
|
yield from results
|
||||||
@@ -415,7 +420,7 @@ def extract_insn_peb_access_characteristic_features(
|
|||||||
|
|
||||||
fs:[0x30] on x86, gs:[0x60] on x64
|
fs:[0x30] on x86, gs:[0x60] on x64
|
||||||
"""
|
"""
|
||||||
func: Function = fh.inner
|
insn: DisassemblyInstruction = ih.inner
|
||||||
|
|
||||||
results = []
|
results = []
|
||||||
|
|
||||||
@@ -445,7 +450,7 @@ def extract_insn_peb_access_characteristic_features(
|
|||||||
results.append((Characteristic("peb access"), ih.address))
|
results.append((Characteristic("peb access"), ih.address))
|
||||||
return False
|
return False
|
||||||
|
|
||||||
for llil in func.get_llils_at(ih.address):
|
for llil in insn.llil:
|
||||||
visit_llil_exprs(llil, llil_checker)
|
visit_llil_exprs(llil, llil_checker)
|
||||||
|
|
||||||
yield from results
|
yield from results
|
||||||
@@ -455,7 +460,7 @@ def extract_insn_segment_access_features(
|
|||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""parse instruction fs or gs access"""
|
"""parse instruction fs or gs access"""
|
||||||
func: Function = fh.inner
|
insn: DisassemblyInstruction = ih.inner
|
||||||
|
|
||||||
results = []
|
results = []
|
||||||
|
|
||||||
@@ -472,7 +477,7 @@ def extract_insn_segment_access_features(
|
|||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
for llil in func.get_llils_at(ih.address):
|
for llil in insn.llil:
|
||||||
visit_llil_exprs(llil, llil_checker)
|
visit_llil_exprs(llil, llil_checker)
|
||||||
|
|
||||||
yield from results
|
yield from results
|
||||||
@@ -500,47 +505,6 @@ def extract_insn_cross_section_cflow(
|
|||||||
yield Characteristic("cross section flow"), ih.address
|
yield Characteristic("cross section flow"), ih.address
|
||||||
|
|
||||||
|
|
||||||
def extract_function_calls_from(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
|
||||||
"""extract functions calls from features
|
|
||||||
|
|
||||||
most relevant at the function scope, however, its most efficient to extract at the instruction scope
|
|
||||||
"""
|
|
||||||
func: Function = fh.inner
|
|
||||||
bv: BinaryView = func.view
|
|
||||||
|
|
||||||
if bv is None:
|
|
||||||
return
|
|
||||||
|
|
||||||
for il in func.get_llils_at(ih.address):
|
|
||||||
if il.operation not in [
|
|
||||||
LowLevelILOperation.LLIL_CALL,
|
|
||||||
LowLevelILOperation.LLIL_CALL_STACK_ADJUST,
|
|
||||||
LowLevelILOperation.LLIL_TAILCALL,
|
|
||||||
]:
|
|
||||||
continue
|
|
||||||
|
|
||||||
dest = il.dest
|
|
||||||
if dest.operation == LowLevelILOperation.LLIL_CONST_PTR:
|
|
||||||
value = dest.value.value
|
|
||||||
yield Characteristic("calls from"), AbsoluteVirtualAddress(value)
|
|
||||||
elif dest.operation == LowLevelILOperation.LLIL_CONST:
|
|
||||||
yield Characteristic("calls from"), AbsoluteVirtualAddress(dest.value)
|
|
||||||
elif dest.operation == LowLevelILOperation.LLIL_LOAD:
|
|
||||||
indirect_src = dest.src
|
|
||||||
if indirect_src.operation == LowLevelILOperation.LLIL_CONST_PTR:
|
|
||||||
value = indirect_src.value.value
|
|
||||||
yield Characteristic("calls from"), AbsoluteVirtualAddress(value)
|
|
||||||
elif indirect_src.operation == LowLevelILOperation.LLIL_CONST:
|
|
||||||
yield Characteristic("calls from"), AbsoluteVirtualAddress(indirect_src.value)
|
|
||||||
elif dest.operation == LowLevelILOperation.LLIL_REG:
|
|
||||||
if dest.value.type in [
|
|
||||||
RegisterValueType.ImportedAddressValue,
|
|
||||||
RegisterValueType.ConstantValue,
|
|
||||||
RegisterValueType.ConstantPointerValue,
|
|
||||||
]:
|
|
||||||
yield Characteristic("calls from"), AbsoluteVirtualAddress(dest.value.value)
|
|
||||||
|
|
||||||
|
|
||||||
def extract_function_indirect_call_characteristic_features(
|
def extract_function_indirect_call_characteristic_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
@@ -550,14 +514,15 @@ def extract_function_indirect_call_characteristic_features(
|
|||||||
most relevant at the function or basic block scope;
|
most relevant at the function or basic block scope;
|
||||||
however, its most efficient to extract at the instruction scope
|
however, its most efficient to extract at the instruction scope
|
||||||
"""
|
"""
|
||||||
func: Function = fh.inner
|
insn: DisassemblyInstruction = ih.inner
|
||||||
|
|
||||||
llil = func.get_llil_at(ih.address)
|
if not insn.is_call:
|
||||||
if llil is None or llil.operation not in [
|
return
|
||||||
LowLevelILOperation.LLIL_CALL,
|
|
||||||
LowLevelILOperation.LLIL_CALL_STACK_ADJUST,
|
# TODO(williballenthin): when to use one vs many llil instructions
|
||||||
LowLevelILOperation.LLIL_TAILCALL,
|
# https://github.com/Vector35/binaryninja-api/issues/6205
|
||||||
]:
|
llil = insn.llil[0]
|
||||||
|
if not llil:
|
||||||
return
|
return
|
||||||
|
|
||||||
if llil.dest.operation in [LowLevelILOperation.LLIL_CONST, LowLevelILOperation.LLIL_CONST_PTR]:
|
if llil.dest.operation in [LowLevelILOperation.LLIL_CONST, LowLevelILOperation.LLIL_CONST_PTR]:
|
||||||
@@ -590,6 +555,5 @@ INSTRUCTION_HANDLERS = (
|
|||||||
extract_insn_peb_access_characteristic_features,
|
extract_insn_peb_access_characteristic_features,
|
||||||
extract_insn_cross_section_cflow,
|
extract_insn_cross_section_cflow,
|
||||||
extract_insn_segment_access_features,
|
extract_insn_segment_access_features,
|
||||||
extract_function_calls_from,
|
|
||||||
extract_function_indirect_call_characteristic_features,
|
extract_function_indirect_call_characteristic_features,
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -239,6 +239,7 @@ def get_extractor(
|
|||||||
return capa.features.extractors.dnfile.extractor.DnfileFeatureExtractor(input_path)
|
return capa.features.extractors.dnfile.extractor.DnfileFeatureExtractor(input_path)
|
||||||
|
|
||||||
elif backend == BACKEND_BINJA:
|
elif backend == BACKEND_BINJA:
|
||||||
|
import capa.perf as perf
|
||||||
import capa.features.extractors.binja.find_binja_api as finder
|
import capa.features.extractors.binja.find_binja_api as finder
|
||||||
|
|
||||||
if not finder.has_binaryninja():
|
if not finder.has_binaryninja():
|
||||||
@@ -262,9 +263,10 @@ def get_extractor(
|
|||||||
raise UnsupportedOSError()
|
raise UnsupportedOSError()
|
||||||
|
|
||||||
with console.status("analyzing program...", spinner="dots"):
|
with console.status("analyzing program...", spinner="dots"):
|
||||||
bv: binaryninja.BinaryView = binaryninja.load(str(input_path))
|
with perf.timing("binary ninja: loading program"):
|
||||||
if bv is None:
|
bv: binaryninja.BinaryView = binaryninja.load(str(input_path))
|
||||||
raise RuntimeError(f"Binary Ninja cannot open file {input_path}")
|
if bv is None:
|
||||||
|
raise RuntimeError(f"Binary Ninja cannot open file {input_path}")
|
||||||
|
|
||||||
return capa.features.extractors.binja.extractor.BinjaFeatureExtractor(bv)
|
return capa.features.extractors.binja.extractor.BinjaFeatureExtractor(bv)
|
||||||
|
|
||||||
|
|||||||
20
capa/perf.py
20
capa/perf.py
@@ -5,6 +5,9 @@
|
|||||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
import time
|
||||||
|
import inspect
|
||||||
|
import contextlib
|
||||||
import collections
|
import collections
|
||||||
|
|
||||||
# this structure is unstable and may change before the next major release.
|
# this structure is unstable and may change before the next major release.
|
||||||
@@ -14,3 +17,20 @@ counters: collections.Counter[str] = collections.Counter()
|
|||||||
def reset():
|
def reset():
|
||||||
global counters
|
global counters
|
||||||
counters = collections.Counter()
|
counters = collections.Counter()
|
||||||
|
|
||||||
|
|
||||||
|
@contextlib.contextmanager
|
||||||
|
def timing(msg: str):
|
||||||
|
"""log the given message start/stop and time taken, using the caller's `logger` instance."""
|
||||||
|
# stack:
|
||||||
|
# 0: here
|
||||||
|
# 1: contextlib
|
||||||
|
# 2: caller
|
||||||
|
caller = inspect.stack()[2]
|
||||||
|
caller_logger = caller.frame.f_globals.get("logger")
|
||||||
|
|
||||||
|
caller_logger.debug("%s...", msg)
|
||||||
|
t0 = time.time()
|
||||||
|
yield
|
||||||
|
t1 = time.time()
|
||||||
|
caller_logger.debug("%s done in %0.1fs.", msg, t1 - t0)
|
||||||
|
|||||||
Reference in New Issue
Block a user