Compare commits

...

4 Commits

Author SHA1 Message Date
mr-tz
304f835a1a initial commit of out-of-the box flirt-based library id 2024-10-10 16:06:55 +00:00
Moritz
1f7f24c467 Merge pull request #2454 from mandiant/fix/ida9idalib
Fix IDA 9.0 / idalib
2024-10-09 18:04:23 +02:00
mr-tz
f2c329b768 rename ida to idapro module for IDA 9.0 2024-10-09 12:20:38 +00:00
mr-tz
22368fbe6f rename bin_search function 2024-10-09 12:13:11 +00:00
6 changed files with 177 additions and 8 deletions

View File

@@ -13,6 +13,7 @@
### Bug Fixes
- extractor: fix exception when PE extractor encounters unknown architecture #2440 @Tamir-K
- IDA Pro: rename ida to idapro module for plugin and idalib in IDA 9.0 #2453 @mr-tz
### capa Explorer Web

164
capa/analysis/flirt.py Normal file
View File

@@ -0,0 +1,164 @@
import io
import sys
import time
import logging
import argparse
from pathlib import Path
import rich
from rich.console import Console
from rich.logging import RichHandler
import capa.helpers
import capa.features.extractors.ida.idalib as idalib
if not idalib.has_idalib():
raise RuntimeError("cannot find IDA idalib module.")
if not idalib.load_idalib():
raise RuntimeError("failed to load IDA idalib module.")
import idaapi
import idapro
import ida_auto
import idautils
import ida_funcs
logger = logging.getLogger(__name__)
from pydantic import BaseModel
def colorbool(v: bool) -> str:
if v:
return f"[green]{str(v)}[/green]"
else:
return f"[red]{str(v)}[/red]"
def colorname(n: str) -> str:
if n.startswith("sub_"):
return n
else:
return f"[cyan]{n}[/cyan]"
class FunctionId(BaseModel):
address: int
is_library: bool
is_thunk: bool
name: str
def to_row(self):
row = [hex(self.address)]
row.append(colorbool(self.is_library))
row.append(colorbool(self.is_thunk))
row.append(colorname(self.name))
return row
def configure_logging(args):
if args.quiet:
logging.getLogger().setLevel(logging.WARNING)
elif args.debug:
logging.getLogger().setLevel(logging.DEBUG)
else:
logging.getLogger().setLevel(logging.INFO)
# use [/] after the logger name to reset any styling,
# and prevent the color from carrying over to the message
logformat = "[dim]%(name)s[/]: %(message)s"
# set markup=True to allow the use of Rich's markup syntax in log messages
rich_handler = RichHandler(markup=True, show_time=False, show_path=True, console=capa.helpers.log_console)
rich_handler.setFormatter(logging.Formatter(logformat))
# use RichHandler for root logger
logging.getLogger().addHandler(rich_handler)
if args.debug:
logging.getLogger("capa").setLevel(logging.DEBUG)
logging.getLogger("viv_utils").setLevel(logging.DEBUG)
else:
logging.getLogger("capa").setLevel(logging.ERROR)
logging.getLogger("viv_utils").setLevel(logging.ERROR)
def main(argv=None):
if argv is None:
argv = sys.argv[1:]
parser = argparse.ArgumentParser(description="Identify library functions using FLIRT.")
parser.add_argument(
"input_file",
type=Path,
help="path to file to analyze",
)
parser.add_argument("-d", "--debug", action="store_true", help="enable debugging output on STDERR")
parser.add_argument("-q", "--quiet", action="store_true", help="disable all output but errors")
args = parser.parse_args(args=argv)
configure_logging(args)
time0 = time.time()
# stderr=True is used here to redirect the spinner banner to stderr, so that users can redirect capa's output.
console = Console(stderr=True, quiet=False)
logger.debug("idalib: opening database...")
# idalib writes to stdout (ugh), so we have to capture that
# so as not to screw up structured output.
with capa.helpers.stdout_redirector(io.BytesIO()):
with console.status("analyzing program...", spinner="dots"):
if idapro.open_database(str(args.input_file), run_auto_analysis=True):
raise RuntimeError("failed to analyze input file")
logger.debug("idalib: waiting for analysis...")
# TODO: add more signature (files)
# TOOD: apply more signatures
ida_auto.auto_wait()
logger.debug("idalib: opened database.")
table = rich.table.Table()
table.add_column("FVA")
table.add_column("library?")
table.add_column("thunk?")
table.add_column("name")
LIBONLY = True
count = 0
for ea in idautils.Functions(start=None, end=None):
f = idaapi.get_func(ea)
is_thunk = bool(f.flags & idaapi.FUNC_THUNK)
is_lib = bool(f.flags & idaapi.FUNC_LIB)
fname = idaapi.get_func_name(ea)
if LIBONLY and not is_lib:
continue
fid = FunctionId(address=ea, is_library=is_lib, is_thunk=is_thunk, name=fname)
table.add_row(*fid.to_row())
count += 1
if count > 50:
break
rich.print(table)
# TODO can we include which signature matched per function?
for index in range(0, ida_funcs.get_idasgn_qty()):
signame, optlibs, nmatches = ida_funcs.get_idasgn_desc_with_matches(index)
rich.print(signame, optlibs, nmatches)
idapro.close_database()
min, sec = divmod(time.time() - time0, 60)
logger.debug("FLIRT-based library identification ran for ~ %02d:%02dm", min, sec)
if __name__ == "__main__":
sys.exit(main())

View File

@@ -41,7 +41,7 @@ if hasattr(ida_bytes, "parse_binpat_str"):
return
while True:
ea, _ = ida_bytes.bin_search3(start, end, patterns, ida_bytes.BIN_SEARCH_FORWARD)
ea, _ = ida_bytes.bin_search(start, end, patterns, ida_bytes.BIN_SEARCH_FORWARD)
if ea == idaapi.BADADDR:
break
start = ea + 1

View File

@@ -18,7 +18,7 @@ logger = logging.getLogger(__name__)
def is_idalib_installed() -> bool:
try:
return importlib.util.find_spec("ida") is not None
return importlib.util.find_spec("idapro") is not None
except ModuleNotFoundError:
return False
@@ -44,6 +44,7 @@ def get_idalib_user_config_path() -> Optional[Path]:
def find_idalib() -> Optional[Path]:
config_path = get_idalib_user_config_path()
if not config_path:
logger.error("IDA Pro user configuration does not exist, please make sure you've installed idalib properly.")
return None
config = json.loads(config_path.read_text(encoding="utf-8"))
@@ -51,6 +52,9 @@ def find_idalib() -> Optional[Path]:
try:
ida_install_dir = Path(config["Paths"]["ida-install-dir"])
except KeyError:
logger.error(
"IDA Pro user configuration does not contain location of IDA Pro installation, please make sure you've installed idalib properly."
)
return None
if not ida_install_dir.exists():
@@ -73,7 +77,7 @@ def find_idalib() -> Optional[Path]:
if not idalib_path.exists():
return None
if not (idalib_path / "ida" / "__init__.py").is_file():
if not (idalib_path / "idapro" / "__init__.py").is_file():
return None
return idalib_path
@@ -96,7 +100,7 @@ def has_idalib() -> bool:
def load_idalib() -> bool:
try:
import ida
import idapro
return True
except ImportError:
@@ -106,7 +110,7 @@ def load_idalib() -> bool:
sys.path.append(idalib_path.absolute().as_posix())
try:
import ida # noqa: F401 unused import
import idapro # noqa: F401 unused import
return True
except ImportError:

View File

@@ -323,7 +323,7 @@ def get_extractor(
if not idalib.load_idalib():
raise RuntimeError("failed to load IDA idalib module.")
import ida
import idapro
import ida_auto
import capa.features.extractors.ida.extractor
@@ -333,7 +333,7 @@ def get_extractor(
# so as not to screw up structured output.
with capa.helpers.stdout_redirector(io.BytesIO()):
with console.status("analyzing program...", spinner="dots"):
if ida.open_database(str(input_path), run_auto_analysis=True):
if idapro.open_database(str(input_path), run_auto_analysis=True):
raise RuntimeError("failed to analyze input file")
logger.debug("idalib: waiting for analysis...")

View File

@@ -177,7 +177,7 @@ known_first_party = [
"binaryninja",
"flirt",
"ghidra",
"ida",
"idapro",
"ida_ida",
"ida_auto",
"ida_bytes",