Update detect_duplicate_features.py

loading yaml file using capa.rule.Rule.from_yaml.
Returning any exception/errors occuring while checking the files.
This commit is contained in:
Aayush Goel
2023-05-06 17:19:57 +05:30
parent 30516c33b7
commit 9eacf72366

View File

@@ -1,63 +1,61 @@
import os
import argparse
import yaml
import capa.rules
import capa.engine as ceng
def findall_features(features):
feature_list = []
for feature in features:
if "and" in feature:
and_list = findall_features(feature["and"])
for x in and_list:
feature_list.append(x)
elif "or" in feature:
or_list = findall_features(feature["or"])
for y in or_list:
feature_list.append(y)
else:
feature_list.append(feature)
def get_child_features(feature):
children = []
return feature_list
if isinstance(feature, (ceng.And, ceng.Or, ceng.Some)):
for child in feature.children:
children.extend(get_child_features(child))
elif isinstance(feature, (ceng.Subscope, ceng.Range, ceng.Not)):
children.extend(get_child_features(feature.child))
else:
children.append(feature)
return children
def get_features(rule_path, errors):
with open(rule_path, "r") as f:
feature_list = []
try:
new_rule = capa.rules.Rule.from_yaml(f.read())
feature_list = get_child_features(new_rule.statement)
except Exception as e:
errors.append("rule :" + rule_path + " " + str(type(e)) + " " + str(e))
return feature_list, errors
def find_overlapping_rules(new_rule_path, rules_path):
if not new_rule_path.endswith(".yml"):
raise ValueError("ERROR ! New rule path file name incorrect")
raise FileNotFoundError("FileNotFoundError ! New rule file name doesn't end with yml")
new_rule_features, error = get_features(new_rule_path, [])
if error:
raise Warning(error[0])
errors: list = []
count = 0
with open(new_rule_path, "r") as f:
new_rule = yaml.safe_load(f)
if "rule" not in new_rule:
raise ValueError("ERROR ! given new rule path isn't a rule")
new_rule_features = findall_features(new_rule["rule"]["features"])
overlapping_rules = []
for rules in rules_path:
for dirpath, dirnames, filenames in os.walk(rules):
for filename in filenames:
if filename.endswith(".yml"):
rule_path = os.path.join(dirpath, filename)
with open(rule_path, "r") as f:
rule = yaml.safe_load(f)
if "rule" not in rule:
continue
rule_features = findall_features(rule["rule"]["features"])
count += 1
rule_features, errors = get_features(rule_path, errors)
if not len(rule_features):
continue
count += 1
if any([feature in rule_features for feature in new_rule_features]):
overlapping_rules.append(rule_path)
result = {"overlapping_rules": overlapping_rules, "count": count}
result = {"overlapping_rules": overlapping_rules, "count": count, "errors": errors}
return result
# python script.py --base-dir /path/to/capa/rules rules/anti-analysis/reference-analysis-tools-strings.yml rules
def main():
parser = argparse.ArgumentParser(description="Find overlapping features in Capa rules.")
@@ -68,19 +66,18 @@ def main():
new_rule_path = args.new_rule
rules_path = args.rules
try:
result = find_overlapping_rules(new_rule_path, rules_path)
print("New rule path : %s" % new_rule_path)
print("\nNew rule path : %s" % new_rule_path)
print("Number of rules checked : %s " % result["count"])
print("Paths to overlapping rules : ", result["overlapping_rules"])
print("Number of rules containing same features : %s" % len(result["overlapping_rules"]))
print("\nWhile checking following .yml files error occured:")
for error in result["errors"]:
print(error)
except Exception as e:
print(e)
try:
print(result)
except:
pass
if __name__ == "__main__":