replace tqdm, termcolor, tabulate with rich (#2374)

* logging: use rich handler for logging * tqdm: remove unneeded redirecting_print_to_tqdm function * tqdm: introduce `CapaProgressBar` rich `Progress` bar * tqdm: replace tqdm with rich Progress bar * tqdm: remove tqdm dependency * termcolor: replace termcolor and update `scripts/` * tests: update `test_render.py` to use rich.console.Console * termcolor: remove termcolor dependency * capa.render.utils: add `write` & `writeln` methods to subclass `Console` * update markup util functions to use fmt strings * tests: update `test_render.py` to use `capa.render.utils.Console` * replace kwarg `end=""` with `write` and `writeln` methods * tabulate: replace tabulate with `rich.table` * tabulate: remove `tabulate` and its dependency `wcwidth` * logging: handle logging in `capa.main` * logging: set up logging in `capa.main` this commit sets up logging in `capa.main` and uses a shared `log_console` in `capa.helpers` for logging purposes * changelog: replace packages with rich * remove entry from pyinstaller and unneeded progress.update call * update requirements.txt * scripts: use `capa.helpers.log_console` in `CapaProgressBar` * logging: configure root logger to use `RichHandler` * remove unused import `inspect`
2025-12-12 07:40:38 -08:00 · 2024-09-27 09:34:21 +02:00
parent 558bf0fbf2
commit 51a4eb46b8
16 changed files with 890 additions and 806 deletions
--- a/scripts/lint.py
+++ b/scripts/lint.py
@@ -31,11 +31,9 @@ from typing import Set, Dict, List
 from pathlib import Path
 from dataclasses import field, dataclass

-import tqdm
 import pydantic
-import termcolor
 import ruamel.yaml
-import tqdm.contrib.logging
+from rich import print

 import capa.main
 import capa.rules
@@ -51,18 +49,6 @@ from capa.render.result_document import RuleMetadata
 logger = logging.getLogger("lint")


-def red(s):
-    return termcolor.colored(s, "red")
-
-
-def orange(s):
-    return termcolor.colored(s, "yellow")
-
-
-def green(s):
-    return termcolor.colored(s, "green")
-
-
@dataclass
 class Context:
    """
@@ -80,8 +66,8 @@ class Context:


 class Lint:
-    WARN = orange("WARN")
-    FAIL = red("FAIL")
+    WARN = "[yellow]WARN[/yellow]"
+    FAIL = "[red]FAIL[/red]"

    name = "lint"
    level = FAIL
@@ -896,7 +882,7 @@ def lint_rule(ctx: Context, rule: Rule):
        if (not lints_failed) and (not lints_warned) and has_examples:
            print("")
            print(f'{"    (nursery) " if is_nursery_rule(rule) else ""} {rule.name}')
-            print(f"      {Lint.WARN}: {green('no lint failures')}: Graduate the rule")
+            print(f"      {Lint.WARN}: '[green]no lint failures[/green]': Graduate the rule")
            print("")
    else:
        lints_failed = len(tuple(filter(lambda v: v.level == Lint.FAIL, violations)))
@@ -921,12 +907,15 @@ def lint(ctx: Context):
    ret = {}

    source_rules = [rule for rule in ctx.rules.rules.values() if not rule.is_subscope_rule()]
-    with tqdm.contrib.logging.tqdm_logging_redirect(source_rules, unit="rule", leave=False) as pbar:
-        with capa.helpers.redirecting_print_to_tqdm(False):
-            for rule in pbar:
-                name = rule.name
-                pbar.set_description(width(f"linting rule: {name}", 48))
-                ret[name] = lint_rule(ctx, rule)
+    n_rules: int = len(source_rules)
+
+    with capa.helpers.CapaProgressBar(transient=True, console=capa.helpers.log_console) as pbar:
+        task = pbar.add_task(description="linting", total=n_rules, unit="rule")
+        for rule in source_rules:
+            name = rule.name
+            pbar.update(task, description=width(f"linting rule: {name}", 48))
+            ret[name] = lint_rule(ctx, rule)
+            pbar.advance(task)

    return ret

@@ -1020,18 +1009,18 @@ def main(argv=None):
    logger.debug("lints ran for ~ %02d:%02dm", min, sec)

    if warned_rules:
-        print(orange("rules with WARN:"))
+        print("[yellow]rules with WARN:[/yellow]")
        for warned_rule in sorted(warned_rules):
            print("  - " + warned_rule)
        print()

    if failed_rules:
-        print(red("rules with FAIL:"))
+        print("[red]rules with FAIL:[/red]")
        for failed_rule in sorted(failed_rules):
            print("  - " + failed_rule)
        return 1
    else:
-        logger.info(green("no lints failed, nice!"))
+        logger.info("[green]no lints failed, nice![/green]")
        return 0


--- a/scripts/profile-time.py
+++ b/scripts/profile-time.py
@@ -42,9 +42,10 @@ import logging
 import argparse
 import subprocess

-import tqdm
 import humanize
-import tabulate
+from rich import box
+from rich.table import Table
+from rich.console import Console

 import capa.main
 import capa.perf
@@ -92,51 +93,61 @@ def main(argv=None):
    except capa.main.ShouldExitError as e:
        return e.status_code

-    with tqdm.tqdm(total=args.number * args.repeat, leave=False) as pbar:
+    with capa.helpers.CapaProgressBar(console=capa.helpers.log_console) as progress:
+        total_iterations = args.number * args.repeat
+        task = progress.add_task("profiling", total=total_iterations)

        def do_iteration():
            capa.perf.reset()
            capa.capabilities.common.find_capabilities(rules, extractor, disable_progress=True)
-            pbar.update(1)
+
+            progress.advance(task)

        samples = timeit.repeat(do_iteration, number=args.number, repeat=args.repeat)

    logger.debug("perf: find capabilities: min: %0.2fs", (min(samples) / float(args.number)))
-    logger.debug("perf: find capabilities: avg: %0.2fs", (sum(samples) / float(args.repeat) / float(args.number)))
+    logger.debug(
+        "perf: find capabilities: avg: %0.2fs",
+        (sum(samples) / float(args.repeat) / float(args.number)),
+    )
    logger.debug("perf: find capabilities: max: %0.2fs", (max(samples) / float(args.number)))

    for counter, count in capa.perf.counters.most_common():
        logger.debug("perf: counter: %s: %s", counter, count)

-    print(
-        tabulate.tabulate(
-            [(counter, humanize.intcomma(count)) for counter, count in capa.perf.counters.most_common()],
-            headers=["feature class", "evaluation count"],
-            tablefmt="github",
-        )
-    )
-    print()
+    console = Console()

-    print(
-        tabulate.tabulate(
-            [
-                (
-                    args.label,
-                    "{:,}".format(capa.perf.counters["evaluate.feature"]),
-                    # python documentation indicates that min(samples) should be preferred,
-                    # so lets put that first.
-                    #
-                    # https://docs.python.org/3/library/timeit.html#timeit.Timer.repeat
-                    f"{(min(samples) / float(args.number)):.2f}s",
-                    f"{(sum(samples) / float(args.repeat) / float(args.number)):.2f}s",
-                    f"{(max(samples) / float(args.number)):.2f}s",
-                )
-            ],
-            headers=["label", "count(evaluations)", "min(time)", "avg(time)", "max(time)"],
-            tablefmt="github",
-        )
+    table1 = Table(box=box.MARKDOWN)
+    table1.add_column("feature class")
+    table1.add_column("evaluation count")
+
+    for counter, count in capa.perf.counters.most_common():
+        table1.add_row(counter, humanize.intcomma(count))
+
+    console.print(table1)
+    console.print()
+
+    table2 = Table(box=box.MARKDOWN)
+    table2.add_column("label")
+    table2.add_column("count(evaluations)", style="magenta")
+    table2.add_column("min(time)", style="green")
+    table2.add_column("avg(time)", style="yellow")
+    table2.add_column("max(time)", style="red")
+
+    table2.add_row(
+        args.label,
+        # python documentation indicates that min(samples) should be preferred,
+        # so lets put that first.
+        #
+        # https://docs.python.org/3/library/timeit.html#timeit.Timer.repeat
+        "{:,}".format(capa.perf.counters["evaluate.feature"]),
+        f"{(min(samples) / float(args.number)):.2f}s",
+        f"{(sum(samples) / float(args.repeat) / float(args.number)):.2f}s",
+        f"{(max(samples) / float(args.number)):.2f}s",
    )

+    console.print(table2)
+
    return 0


--- a/scripts/show-unused-features.py
+++ b/scripts/show-unused-features.py
@@ -12,11 +12,12 @@ import sys
 import typing
 import logging
 import argparse
-from typing import Set, Tuple
+from typing import Set, List, Tuple
 from collections import Counter

-import tabulate
-from termcolor import colored
+from rich import print
+from rich.text import Text
+from rich.table import Table

 import capa.main
 import capa.rules
@@ -77,23 +78,30 @@ def get_file_features(
    return feature_map


-def get_colored(s: str):
+def get_colored(s: str) -> Text:
    if "(" in s and ")" in s:
        s_split = s.split("(", 1)
-        s_color = colored(s_split[1][:-1], "cyan")
-        return f"{s_split[0]}({s_color})"
+        return Text.assemble(s_split[0], "(", (s_split[1][:-1], "cyan"), ")")
    else:
-        return colored(s, "cyan")
+        return Text(s, style="cyan")


 def print_unused_features(feature_map: typing.Counter[Feature], rules_feature_set: Set[Feature]):
-    unused_features = []
+    unused_features: List[Tuple[str, Text]] = []
    for feature, count in reversed(feature_map.most_common()):
        if feature in rules_feature_set:
            continue
        unused_features.append((str(count), get_colored(str(feature))))
+
+    table = Table(title="Unused Features", box=None)
+    table.add_column("Count", style="dim")
+    table.add_column("Feature")
+
+    for count_str, feature_text in unused_features:
+        table.add_row(count_str, feature_text)
+
    print("\n")
-    print(tabulate.tabulate(unused_features, headers=["Count", "Feature"], tablefmt="plain"))
+    print(table)
    print("\n")