Files
capa/scripts/detect_duplicate_features.py
Willi Ballenthin c3301d3b3f refactor main to for ease of integration (#1948)
* main: split main into a bunch of "main routines"

[wip] since there are a few references to BinExport2
that are in progress elsewhre. Next commit will remove them.

* main: remove references to wip BinExport2 code

* changelog

* main: rename first position argument "input_file"

closes #1946

* main: linters

* main: move rule-related routines to capa.rules

ref #1821

* main: extract routines to capa.loader module

closes #1821

* add loader module

* loader: learn to load freeze format

* freeze: use new cli arg handling

* Update capa/loader.py

Co-authored-by: Moritz <mr-tz@users.noreply.github.com>

* main: remove duplicate documentation

* main: add doc about where some functions live

* scripts: migrate to new main wrapper helper functions

* scripts: port to main routines

* main: better handle auto-detection of backend

* scripts: migrate bulk-process to main wrappers

* scripts: migrate scripts to main wrappers

* main: rename *_from_args to *_from_cli

* changelog

* cache-ruleset: remove duplication

* main: fix tag handling

* cache-ruleset: fix cli args

* cache-ruleset: fix special rule cli handling

* scripts: fix type bytes

* main: remove old TODO message

* loader: fix references to binja extractor

---------

Co-authored-by: Moritz <mr-tz@users.noreply.github.com>
2024-01-29 13:59:05 +01:00

96 lines
3.2 KiB
Python

# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import sys
import logging
import argparse
from typing import Set
from pathlib import Path
import capa.main
import capa.rules
from capa.features.common import Feature
logger = logging.getLogger("detect_duplicate_features")
def get_features(rule_path: str) -> Set[Feature]:
"""
Extracts all features from a given rule file.
Args:
rule_path (str): The path to the rule file to extract features from.
Returns:
set: A set of all feature statements contained within the rule file.
"""
with Path(rule_path).open("r", encoding="utf-8") as f:
try:
new_rule = capa.rules.Rule.from_yaml(f.read())
return new_rule.extract_all_features()
except Exception as e:
logger.error("Error: New rule %s %s %s", rule_path, str(type(e)), str(e))
sys.exit(-1)
def find_overlapping_rules(new_rule_path, rules_path):
if not new_rule_path.endswith(".yml"):
logger.error("FileNotFoundError ! New rule file name doesn't end with .yml")
sys.exit(-1)
# Loads features of new rule in a list.
new_rule_features = get_features(new_rule_path)
count = 0
overlapping_rules = []
# capa.rules.RuleSet stores all rules in given paths
ruleset = capa.rules.get_rules(rules_path)
for rule_name, rule in ruleset.rules.items():
rule_features = rule.extract_all_features()
if not len(rule_features):
continue
count += 1
# Checks if any features match between existing and new rule.
if any(feature in rule_features for feature in new_rule_features):
overlapping_rules.append(rule_name)
result = {"overlapping_rules": overlapping_rules, "count": count}
return result
def main():
parser = argparse.ArgumentParser(description="Find overlapping features in Capa rules.")
parser.add_argument("rules", type=str, action="append", help="Path to rules")
parser.add_argument("new_rule", type=str, help="Path to new rule")
args = parser.parse_args()
new_rule_path = args.new_rule
rules_path = [Path(rule) for rule in args.rules]
result = find_overlapping_rules(new_rule_path, rules_path)
print("\nNew rule path : %s" % new_rule_path)
print("Number of rules checked : %s " % result["count"])
if result["overlapping_rules"]:
print("Paths to overlapping rules : ")
for r in result["overlapping_rules"]:
print("- %s" % r)
else:
print("Paths to overlapping rules : None")
print("Number of rules containing same features : %s" % len(result["overlapping_rules"]))
print("\n")
return len(result["overlapping_rules"])
if __name__ == "__main__":
sys.exit(main())