mirror of
https://github.com/mandiant/capa.git
synced 2025-12-12 07:40:38 -08:00
add call graph based id of functions called by library code
This commit is contained in:
@@ -12,7 +12,7 @@ import argparse
|
||||
import tempfile
|
||||
import contextlib
|
||||
from enum import Enum
|
||||
from typing import List, Optional
|
||||
from typing import List, Iterable, Optional
|
||||
from pathlib import Path
|
||||
|
||||
import rich
|
||||
@@ -51,6 +51,7 @@ class Method(str, Enum):
|
||||
STRINGS = "strings"
|
||||
THUNK = "thunk"
|
||||
ENTRYPOINT = "entrypoint"
|
||||
CALLGRAPH = "callgraph"
|
||||
|
||||
|
||||
class FunctionClassification(BaseModel):
|
||||
@@ -69,6 +70,9 @@ class FunctionClassification(BaseModel):
|
||||
library_name: Optional[str] = None
|
||||
library_version: Optional[str] = None
|
||||
|
||||
# additional note on the classification, TODO removeme if not useful beyond dev/debug
|
||||
note: Optional[str] = None
|
||||
|
||||
|
||||
class FunctionIdResults(BaseModel):
|
||||
function_classifications: List[FunctionClassification]
|
||||
@@ -110,6 +114,54 @@ def ida_session(input_path: Path, use_temp_dir=True):
|
||||
t.unlink()
|
||||
|
||||
|
||||
def get_library_called_functions(
|
||||
function_classifications: list[FunctionClassification],
|
||||
) -> Iterable[FunctionClassification]:
|
||||
MAX_PASSES = 10
|
||||
classifications_by_va = capa.analysis.strings.create_index(function_classifications, "va")
|
||||
for n in range(MAX_PASSES):
|
||||
found_new_lib_func = False
|
||||
|
||||
for fva in idautils.Functions():
|
||||
if classifications_by_va.get(fva):
|
||||
# already classified
|
||||
continue
|
||||
|
||||
for ref in idautils.CodeRefsTo(fva, True):
|
||||
f: idaapi.func_t = idaapi.get_func(ref)
|
||||
if not f:
|
||||
# no function associated with reference location
|
||||
continue
|
||||
|
||||
ref_fva = f.start_ea
|
||||
fname = idaapi.get_func_name(ref_fva)
|
||||
if fname in ("___tmainCRTStartup",):
|
||||
# ignore library functions, where we know that they call user-code
|
||||
# TODO(mr): extend this list
|
||||
continue
|
||||
|
||||
if classifications := classifications_by_va.get(ref_fva):
|
||||
for c in classifications:
|
||||
if c.classification == Classification.LIBRARY:
|
||||
fc = FunctionClassification(
|
||||
va=fva,
|
||||
name=idaapi.get_func_name(fva),
|
||||
classification=Classification.LIBRARY,
|
||||
method=Method.CALLGRAPH,
|
||||
library_name=c.library_name,
|
||||
library_version=c.library_version,
|
||||
note=f"called by 0x{ref_fva:x} ({c.method.value})",
|
||||
)
|
||||
classifications_by_va[fva].append(fc)
|
||||
yield fc
|
||||
found_new_lib_func = True
|
||||
break
|
||||
|
||||
if not found_new_lib_func:
|
||||
logger.debug("no update in pass %d, done here", n)
|
||||
return
|
||||
|
||||
|
||||
def is_thunk_function(fva):
|
||||
f = idaapi.get_func(fva)
|
||||
return bool(f.flags & idaapi.FUNC_THUNK)
|
||||
@@ -177,7 +229,11 @@ def main(argv=None):
|
||||
|
||||
for va in idautils.Functions():
|
||||
name = idaapi.get_func_name(va)
|
||||
if name not in {"WinMain", }:
|
||||
if name not in {
|
||||
"WinMain",
|
||||
"_main",
|
||||
"main",
|
||||
}:
|
||||
continue
|
||||
|
||||
function_classifications.append(
|
||||
@@ -189,6 +245,10 @@ def main(argv=None):
|
||||
)
|
||||
)
|
||||
|
||||
with capa.main.timing("call graph based library identification"):
|
||||
for fc in get_library_called_functions(function_classifications):
|
||||
function_classifications.append(fc)
|
||||
|
||||
doc = FunctionIdResults(function_classifications=[])
|
||||
classifications_by_va = capa.analysis.strings.create_index(function_classifications, "va")
|
||||
for va in idautils.Functions():
|
||||
@@ -217,6 +277,7 @@ def main(argv=None):
|
||||
|
||||
classifications_by_va = capa.analysis.strings.create_index(doc.function_classifications, "va", sorted_=True)
|
||||
for va, classifications in classifications_by_va.items():
|
||||
# TODO count of classifications if multiple?
|
||||
name = ", ".join({c.name for c in classifications})
|
||||
if "sub_" in name:
|
||||
name = Text(name, style="grey53")
|
||||
@@ -224,13 +285,14 @@ def main(argv=None):
|
||||
classification = {c.classification for c in classifications}
|
||||
method = {c.method for c in classifications if c.method}
|
||||
extra = {f"{c.library_name}@{c.library_version}" for c in classifications if c.library_name}
|
||||
note = {f"{c.note}" for c in classifications if c.note}
|
||||
|
||||
table.add_row(
|
||||
hex(va),
|
||||
", ".join(classification) if classification != {"unknown"} else Text("unknown", style="grey53"),
|
||||
", ".join(method),
|
||||
name,
|
||||
", ".join(extra),
|
||||
f"{', '.join(extra)} {', '.join(note)}",
|
||||
)
|
||||
|
||||
rich.print(table)
|
||||
|
||||
Reference in New Issue
Block a user