refactor main to for ease of integration (#1948)

* main: split main into a bunch of "main routines"

[wip] since there are a few references to BinExport2
that are in progress elsewhre. Next commit will remove them.

* main: remove references to wip BinExport2 code

* changelog

* main: rename first position argument "input_file"

closes #1946

* main: linters

* main: move rule-related routines to capa.rules

ref #1821

* main: extract routines to capa.loader module

closes #1821

* add loader module

* loader: learn to load freeze format

* freeze: use new cli arg handling

* Update capa/loader.py

Co-authored-by: Moritz <mr-tz@users.noreply.github.com>

* main: remove duplicate documentation

* main: add doc about where some functions live

* scripts: migrate to new main wrapper helper functions

* scripts: port to main routines

* main: better handle auto-detection of backend

* scripts: migrate bulk-process to main wrappers

* scripts: migrate scripts to main wrappers

* main: rename *_from_args to *_from_cli

* changelog

* cache-ruleset: remove duplication

* main: fix tag handling

* cache-ruleset: fix cli args

* cache-ruleset: fix special rule cli handling

* scripts: fix type bytes

* main: remove old TODO message

* loader: fix references to binja extractor

---------

Co-authored-by: Moritz <mr-tz@users.noreply.github.com>
This commit is contained in:
Willi Ballenthin
2024-01-29 13:59:05 +01:00
committed by GitHub
parent d2e1a47192
commit c3301d3b3f
26 changed files with 1321 additions and 1168 deletions

View File

@@ -55,13 +55,11 @@ Unless required by applicable law or agreed to in writing, software distributed
is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and limitations under the License.
"""
import os
import sys
import logging
import argparse
import collections
from typing import Dict
from pathlib import Path
import colorama
@@ -76,10 +74,7 @@ import capa.render.verbose
import capa.features.freeze
import capa.capabilities.common
import capa.render.result_document as rd
from capa.helpers import get_file_taste
from capa.features.common import FORMAT_AUTO
from capa.features.freeze import Address
from capa.features.extractors.base_extractor import FeatureExtractor, StaticFeatureExtractor
logger = logging.getLogger("capa.show-capabilities-by-function")
@@ -142,67 +137,37 @@ def main(argv=None):
argv = sys.argv[1:]
parser = argparse.ArgumentParser(description="detect capabilities in programs.")
capa.main.install_common_args(parser, wanted={"format", "os", "backend", "sample", "signatures", "rules", "tag"})
capa.main.install_common_args(
parser, wanted={"format", "os", "backend", "input_file", "signatures", "rules", "tag"}
)
args = parser.parse_args(args=argv)
capa.main.handle_common_args(args)
try:
taste = get_file_taste(Path(args.sample))
except IOError as e:
logger.error("%s", str(e))
return -1
try:
rules = capa.main.get_rules(args.rules)
logger.info("successfully loaded %s rules", len(rules))
if args.tag:
rules = rules.filter_rules_by_meta(args.tag)
logger.info("selected %s rules", len(rules))
except (IOError, capa.rules.InvalidRule, capa.rules.InvalidRuleSet) as e:
logger.error("%s", str(e))
return -1
try:
sig_paths = capa.main.get_signatures(args.signatures)
except IOError as e:
logger.error("%s", str(e))
return -1
if (args.format == "freeze") or (args.format == FORMAT_AUTO and capa.features.freeze.is_freeze(taste)):
format_ = "freeze"
extractor: FeatureExtractor = capa.features.freeze.load(Path(args.sample).read_bytes())
else:
format_ = args.format
should_save_workspace = os.environ.get("CAPA_SAVE_WORKSPACE") not in ("0", "no", "NO", "n", None)
try:
extractor = capa.main.get_extractor(
args.sample, args.format, args.os, args.backend, sig_paths, should_save_workspace
)
assert isinstance(extractor, StaticFeatureExtractor)
except capa.exceptions.UnsupportedFormatError:
capa.helpers.log_unsupported_format_error()
return -1
except capa.exceptions.UnsupportedRuntimeError:
capa.helpers.log_unsupported_runtime_error()
return -1
capa.main.handle_common_args(args)
capa.main.ensure_input_exists_from_cli(args)
input_format = capa.main.get_input_format_from_cli(args)
rules = capa.main.get_rules_from_cli(args)
backend = capa.main.get_backend_from_cli(args, input_format)
sample_path = capa.main.get_sample_path_from_cli(args, backend)
if sample_path is None:
os_ = "unknown"
else:
os_ = capa.loader.get_os(sample_path)
extractor = capa.main.get_extractor_from_cli(args, input_format, backend)
except capa.main.ShouldExitError as e:
return e.status_code
capabilities, counts = capa.capabilities.common.find_capabilities(rules, extractor)
meta = capa.main.collect_metadata(argv, args.sample, format_, args.os, args.rules, extractor, counts)
meta.analysis.layout = capa.main.compute_layout(rules, extractor, capabilities)
meta = capa.loader.collect_metadata(argv, args.input_file, input_format, os_, args.rules, extractor, counts)
meta.analysis.layout = capa.loader.compute_layout(rules, extractor, capabilities)
if capa.capabilities.common.has_file_limitation(rules, capabilities):
# bail if capa encountered file limitation e.g. a packed binary
# do show the output in verbose mode, though.
if not (args.verbose or args.vverbose or args.json):
return -1
return capa.main.E_FILE_LIMITATION
# colorama will detect:
# - when on Windows console, and fixup coloring, and
# - when not an interactive session, and disable coloring
# renderers should use coloring and assume it will be stripped out if necessary.
colorama.init()
doc = rd.ResultDocument.from_capa(meta, rules, capabilities)
print(render_matches_by_function(doc))
colorama.deinit()