From 480df323e5153a7cb89403c2ef0c657f514b8d69 Mon Sep 17 00:00:00 2001 From: William Ballenthin Date: Mon, 8 Nov 2021 11:51:09 -0700 Subject: [PATCH] scripts: add py script for profiling time --- scripts/profile-time.py | 115 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 115 insertions(+) create mode 100644 scripts/profile-time.py diff --git a/scripts/profile-time.py b/scripts/profile-time.py new file mode 100644 index 00000000..3c47b67b --- /dev/null +++ b/scripts/profile-time.py @@ -0,0 +1,115 @@ +import sys +import timeit +import logging +import argparse +import subprocess + +import tqdm +import tabulate + +import capa.main +import capa.perf +import capa.rules +import capa.engine +import capa.helpers +import capa.features +import capa.features.common +import capa.features.freeze + +logger = logging.getLogger("capa.profile") + + +def main(argv=None): + if argv is None: + argv = sys.argv[1:] + + label = subprocess.run( + "git show --pretty=oneline --abbrev-commit | head -n 1", shell=True, capture_output=True, text=True + ).stdout.strip() + is_dirty = ( + subprocess.run( + "git status | grep 'modified: ' | grep -v 'rules' | grep -v 'tests/data'", + shell=True, + capture_output=True, + text=True, + ).stdout + != "" + ) + + if is_dirty: + label += " (dirty)" + + parser = argparse.ArgumentParser(description="Profile capa performance") + capa.main.install_common_args(parser, wanted={"format", "sample", "signatures", "rules"}) + + parser.add_argument("--number", type=int, default=3, help="batch size of profile collection") + parser.add_argument("--repeat", type=int, default=30, help="batch count of profile collection") + parser.add_argument("--label", type=str, default=label, help="description of the profile collection") + + args = parser.parse_args(args=argv) + capa.main.handle_common_args(args) + + try: + taste = capa.helpers.get_file_taste(args.sample) + except IOError as e: + logger.error("%s", str(e)) + return -1 + + try: + with capa.main.timing("load rules"): + rules = capa.rules.RuleSet(capa.main.get_rules(args.rules, disable_progress=True)) + except (IOError) as e: + logger.error("%s", str(e)) + return -1 + + try: + sig_paths = capa.main.get_signatures(args.signatures) + except (IOError) as e: + logger.error("%s", str(e)) + return -1 + + if (args.format == "freeze") or (args.format == "auto" and capa.features.freeze.is_freeze(taste)): + with open(args.sample, "rb") as f: + extractor = capa.features.freeze.load(f.read()) + else: + extractor = capa.main.get_extractor( + args.sample, args.format, capa.main.BACKEND_VIV, sig_paths, should_save_workspace=False + ) + + with tqdm.tqdm(total=args.number * args.repeat) as pbar: + + def do_iteration(): + capa.perf.reset() + capa.main.find_capabilities(rules, extractor, disable_progress=True) + pbar.update(1) + + samples = timeit.repeat(do_iteration, number=args.number, repeat=args.repeat) + + logger.debug("perf: find capabilities: min: %0.2fs" % (min(samples) / float(args.number))) + logger.debug("perf: find capabilities: avg: %0.2fs" % (sum(samples) / float(args.repeat) / float(args.number))) + logger.debug("perf: find capabilities: max: %0.2fs" % (max(samples) / float(args.number))) + + for (counter, count) in capa.perf.counters.most_common(): + logger.debug("perf: counter: {:}: {:,}".format(counter, count)) + + print( + tabulate.tabulate( + [ + ( + args.label, + "{:,}".format(capa.perf.counters["evaluate.feature"]), + "%0.2fs" % (sum(samples) / float(args.repeat) / float(args.number)), + "%0.2fs" % (min(samples) / float(args.number)), + "%0.2fs" % (max(samples) / float(args.number)), + ) + ], + headers=["label", "count(evaluations)", "avg(time)", "min(time)", "max(time)"], + tablefmt="github", + ) + ) + + return 0 + + +if __name__ == "__main__": + sys.exit(main())