From 256611bef5372295004d3aa10e94c841ac57c1e0 Mon Sep 17 00:00:00 2001 From: Aayush Goel <81844215+Aayush-Goel-04@users.noreply.github.com> Date: Thu, 27 Apr 2023 06:00:38 +0530 Subject: [PATCH] Create detect_duplicate_features.py Fixes #1451 Python script to detect feature overlap between new and existing CAPA rules. Checks if the a feature in new rules exists in an existing rule --- scripts/detect_duplicate_features.py | 67 ++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 scripts/detect_duplicate_features.py diff --git a/scripts/detect_duplicate_features.py b/scripts/detect_duplicate_features.py new file mode 100644 index 00000000..7da0655e --- /dev/null +++ b/scripts/detect_duplicate_features.py @@ -0,0 +1,67 @@ +import os +import yaml + +def findall_features(features): + feature_list = [] + for feature in features: + if 'and' in feature: + and_list = findall_features(feature['and']) + for x in and_list: + feature_list.append(x) + elif 'or' in feature: + or_list = findall_features(feature['or']) + for y in or_list: + feature_list.append(y) + else: + feature_list.append(feature) + return feature_list + +def find_overlapping_rules(new_rule_path, rules_path): + if not new_rule_path.endswith('.yml'): + return 'ERROR ! New rule path file name incorrect' + + count = 0 + + with open(new_rule_path, 'r') as f: + new_rule = yaml.safe_load(f) + if 'rule' not in new_rule: + return "ERROR ! given new rule path isn't a rule" + + new_rule_features = findall_features(new_rule['rule']['features']) + + overlapping_rules = [] + + for dirpath, dirnames, filenames in os.walk(rules_path): + for filename in filenames: + if filename.endswith('.yml'): + rule_path = os.path.join(dirpath, filename) + with open(rule_path, 'r') as f: + rule = yaml.safe_load(f) + if 'rule' not in rule: + continue + rule_features = findall_features(rule['rule']['features']) + count+=1 + if any([feature in rule_features for feature in new_rule_features]): + overlapping_rules.append(rule_path) + result = {'overlapping_rules': overlapping_rules, + 'count': count} + + return result + +# usage +base_dir = '' +new_rule_path = base_dir + 'rules\\anti-analysis\\reference-analysis-tools-strings.yml' +rules_path = base_dir + 'rules' + +try: + result = find_overlapping_rules(new_rule_path, rules_path) + print('New rule path : %s' % new_rule_path) + print('Number of rules checked : %s ' % result['count']) + print('Paths to overlapping rules : ', result['overlapping_rules']) + print('Number of rules containing same features : %s' % len(result['overlapping_rules'])) +except Exception as e: + print(e) + try: + print(result,'') + except: + pass