replace tqdm, termcolor, tabulate with rich (#2374)

* logging: use rich handler for logging

* tqdm: remove unneeded redirecting_print_to_tqdm function

* tqdm: introduce `CapaProgressBar` rich `Progress` bar

* tqdm: replace tqdm with rich Progress bar

* tqdm: remove tqdm dependency

* termcolor: replace termcolor and update `scripts/`

* tests: update `test_render.py` to use rich.console.Console

* termcolor: remove termcolor dependency

* capa.render.utils: add `write` & `writeln` methods to subclass `Console`

* update markup util functions to use fmt strings

* tests: update `test_render.py` to use `capa.render.utils.Console`

* replace kwarg `end=""` with `write` and `writeln` methods

* tabulate: replace tabulate with `rich.table`

* tabulate: remove `tabulate` and its dependency `wcwidth`

* logging: handle logging in `capa.main`

* logging: set up logging in `capa.main`

this commit sets up logging in `capa.main` and uses a shared
`log_console` in `capa.helpers` for logging purposes

* changelog: replace packages with rich

* remove entry from pyinstaller and unneeded progress.update call

* update requirements.txt

* scripts: use `capa.helpers.log_console` in `CapaProgressBar`

* logging: configure root logger to use `RichHandler`

* remove unused import `inspect`
This commit is contained in:
Fariss
2024-09-27 09:34:21 +02:00
committed by GitHub
parent 558bf0fbf2
commit 51a4eb46b8
16 changed files with 890 additions and 806 deletions

View File

@@ -31,11 +31,9 @@ from typing import Set, Dict, List
from pathlib import Path
from dataclasses import field, dataclass
import tqdm
import pydantic
import termcolor
import ruamel.yaml
import tqdm.contrib.logging
from rich import print
import capa.main
import capa.rules
@@ -51,18 +49,6 @@ from capa.render.result_document import RuleMetadata
logger = logging.getLogger("lint")
def red(s):
return termcolor.colored(s, "red")
def orange(s):
return termcolor.colored(s, "yellow")
def green(s):
return termcolor.colored(s, "green")
@dataclass
class Context:
"""
@@ -80,8 +66,8 @@ class Context:
class Lint:
WARN = orange("WARN")
FAIL = red("FAIL")
WARN = "[yellow]WARN[/yellow]"
FAIL = "[red]FAIL[/red]"
name = "lint"
level = FAIL
@@ -896,7 +882,7 @@ def lint_rule(ctx: Context, rule: Rule):
if (not lints_failed) and (not lints_warned) and has_examples:
print("")
print(f'{" (nursery) " if is_nursery_rule(rule) else ""} {rule.name}')
print(f" {Lint.WARN}: {green('no lint failures')}: Graduate the rule")
print(f" {Lint.WARN}: '[green]no lint failures[/green]': Graduate the rule")
print("")
else:
lints_failed = len(tuple(filter(lambda v: v.level == Lint.FAIL, violations)))
@@ -921,12 +907,15 @@ def lint(ctx: Context):
ret = {}
source_rules = [rule for rule in ctx.rules.rules.values() if not rule.is_subscope_rule()]
with tqdm.contrib.logging.tqdm_logging_redirect(source_rules, unit="rule", leave=False) as pbar:
with capa.helpers.redirecting_print_to_tqdm(False):
for rule in pbar:
name = rule.name
pbar.set_description(width(f"linting rule: {name}", 48))
ret[name] = lint_rule(ctx, rule)
n_rules: int = len(source_rules)
with capa.helpers.CapaProgressBar(transient=True, console=capa.helpers.log_console) as pbar:
task = pbar.add_task(description="linting", total=n_rules, unit="rule")
for rule in source_rules:
name = rule.name
pbar.update(task, description=width(f"linting rule: {name}", 48))
ret[name] = lint_rule(ctx, rule)
pbar.advance(task)
return ret
@@ -1020,18 +1009,18 @@ def main(argv=None):
logger.debug("lints ran for ~ %02d:%02dm", min, sec)
if warned_rules:
print(orange("rules with WARN:"))
print("[yellow]rules with WARN:[/yellow]")
for warned_rule in sorted(warned_rules):
print(" - " + warned_rule)
print()
if failed_rules:
print(red("rules with FAIL:"))
print("[red]rules with FAIL:[/red]")
for failed_rule in sorted(failed_rules):
print(" - " + failed_rule)
return 1
else:
logger.info(green("no lints failed, nice!"))
logger.info("[green]no lints failed, nice![/green]")
return 0

View File

@@ -42,9 +42,10 @@ import logging
import argparse
import subprocess
import tqdm
import humanize
import tabulate
from rich import box
from rich.table import Table
from rich.console import Console
import capa.main
import capa.perf
@@ -92,51 +93,61 @@ def main(argv=None):
except capa.main.ShouldExitError as e:
return e.status_code
with tqdm.tqdm(total=args.number * args.repeat, leave=False) as pbar:
with capa.helpers.CapaProgressBar(console=capa.helpers.log_console) as progress:
total_iterations = args.number * args.repeat
task = progress.add_task("profiling", total=total_iterations)
def do_iteration():
capa.perf.reset()
capa.capabilities.common.find_capabilities(rules, extractor, disable_progress=True)
pbar.update(1)
progress.advance(task)
samples = timeit.repeat(do_iteration, number=args.number, repeat=args.repeat)
logger.debug("perf: find capabilities: min: %0.2fs", (min(samples) / float(args.number)))
logger.debug("perf: find capabilities: avg: %0.2fs", (sum(samples) / float(args.repeat) / float(args.number)))
logger.debug(
"perf: find capabilities: avg: %0.2fs",
(sum(samples) / float(args.repeat) / float(args.number)),
)
logger.debug("perf: find capabilities: max: %0.2fs", (max(samples) / float(args.number)))
for counter, count in capa.perf.counters.most_common():
logger.debug("perf: counter: %s: %s", counter, count)
print(
tabulate.tabulate(
[(counter, humanize.intcomma(count)) for counter, count in capa.perf.counters.most_common()],
headers=["feature class", "evaluation count"],
tablefmt="github",
)
)
print()
console = Console()
print(
tabulate.tabulate(
[
(
args.label,
"{:,}".format(capa.perf.counters["evaluate.feature"]),
# python documentation indicates that min(samples) should be preferred,
# so lets put that first.
#
# https://docs.python.org/3/library/timeit.html#timeit.Timer.repeat
f"{(min(samples) / float(args.number)):.2f}s",
f"{(sum(samples) / float(args.repeat) / float(args.number)):.2f}s",
f"{(max(samples) / float(args.number)):.2f}s",
)
],
headers=["label", "count(evaluations)", "min(time)", "avg(time)", "max(time)"],
tablefmt="github",
)
table1 = Table(box=box.MARKDOWN)
table1.add_column("feature class")
table1.add_column("evaluation count")
for counter, count in capa.perf.counters.most_common():
table1.add_row(counter, humanize.intcomma(count))
console.print(table1)
console.print()
table2 = Table(box=box.MARKDOWN)
table2.add_column("label")
table2.add_column("count(evaluations)", style="magenta")
table2.add_column("min(time)", style="green")
table2.add_column("avg(time)", style="yellow")
table2.add_column("max(time)", style="red")
table2.add_row(
args.label,
# python documentation indicates that min(samples) should be preferred,
# so lets put that first.
#
# https://docs.python.org/3/library/timeit.html#timeit.Timer.repeat
"{:,}".format(capa.perf.counters["evaluate.feature"]),
f"{(min(samples) / float(args.number)):.2f}s",
f"{(sum(samples) / float(args.repeat) / float(args.number)):.2f}s",
f"{(max(samples) / float(args.number)):.2f}s",
)
console.print(table2)
return 0

View File

@@ -12,11 +12,12 @@ import sys
import typing
import logging
import argparse
from typing import Set, Tuple
from typing import Set, List, Tuple
from collections import Counter
import tabulate
from termcolor import colored
from rich import print
from rich.text import Text
from rich.table import Table
import capa.main
import capa.rules
@@ -77,23 +78,30 @@ def get_file_features(
return feature_map
def get_colored(s: str):
def get_colored(s: str) -> Text:
if "(" in s and ")" in s:
s_split = s.split("(", 1)
s_color = colored(s_split[1][:-1], "cyan")
return f"{s_split[0]}({s_color})"
return Text.assemble(s_split[0], "(", (s_split[1][:-1], "cyan"), ")")
else:
return colored(s, "cyan")
return Text(s, style="cyan")
def print_unused_features(feature_map: typing.Counter[Feature], rules_feature_set: Set[Feature]):
unused_features = []
unused_features: List[Tuple[str, Text]] = []
for feature, count in reversed(feature_map.most_common()):
if feature in rules_feature_set:
continue
unused_features.append((str(count), get_colored(str(feature))))
table = Table(title="Unused Features", box=None)
table.add_column("Count", style="dim")
table.add_column("Feature")
for count_str, feature_text in unused_features:
table.add_row(count_str, feature_text)
print("\n")
print(tabulate.tabulate(unused_features, headers=["Count", "Feature"], tablefmt="plain"))
print(table)
print("\n")