introduce flake8-todos linter

This commit is contained in:
Willi Ballenthin
2023-07-09 23:35:52 +02:00
parent 4a49543d12
commit ae10a2ea34
19 changed files with 41 additions and 41 deletions

View File

@@ -21,6 +21,7 @@ if TYPE_CHECKING:
import capa.perf import capa.perf
import capa.features import capa.features
import capa.features.extractors.elf import capa.features.extractors.elf
import capa.features.freeze.features
from capa.features.address import Address from capa.features.address import Address
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -127,9 +128,9 @@ class Feature(abc.ABC): # noqa: B024
return self.name == other.name and self.value == other.value return self.name == other.name and self.value == other.value
def __lt__(self, other): def __lt__(self, other):
# TODO: this is a huge hack! # implementing sorting by serializing to JSON is a huge hack.
import capa.features.freeze.features # its slow, inelegant, and probably doesn't work intuitively;
# however, we only use it for deterministic output, so it's good enough for now.
return ( return (
capa.features.freeze.features.feature_from_capa(self).json() capa.features.freeze.features.feature_from_capa(self).json()
< capa.features.freeze.features.feature_from_capa(other).json() < capa.features.freeze.features.feature_from_capa(other).json()

View File

@@ -41,7 +41,9 @@ def unmangle_c_name(name: str) -> str:
# _lstrlenWStub@4 # _lstrlenWStub@4
# A small optimization to avoid running the regex too many times # A small optimization to avoid running the regex too many times
# TODO: this still increases the unit test execution time from 170s to 200s, should be able to accelerate it # this still increases the unit test execution time from 170s to 200s, should be able to accelerate it
#
# TODO(xusheng): performance optimizations to improve test execution time
if name[0] in ["@", "_"]: if name[0] in ["@", "_"]:
match = re.match(r"^[@|_](.*?)(Stub)?(@\d+)?$", name) match = re.match(r"^[@|_](.*?)(Stub)?(@\d+)?$", name)
if match: if match:

View File

@@ -324,7 +324,7 @@ def extract_insn_offset_features(
def is_nzxor_stack_cookie(f: Function, bb: BinjaBasicBlock, llil: LowLevelILInstruction) -> bool: def is_nzxor_stack_cookie(f: Function, bb: BinjaBasicBlock, llil: LowLevelILInstruction) -> bool:
"""check if nzxor exists within stack cookie delta""" """check if nzxor exists within stack cookie delta"""
# TODO: we can do a much accurate analysi using LLIL SSA # TODO(xusheng): use LLIL SSA to do more accurate analysis
reg_names = [] reg_names = []
if llil.left.operation == LowLevelILOperation.LLIL_REG: if llil.left.operation == LowLevelILOperation.LLIL_REG:

View File

@@ -36,8 +36,8 @@ def extract_file_import_names(elf, **kwargs):
for _, symbol in enumerate(section.iter_symbols()): for _, symbol in enumerate(section.iter_symbols()):
if symbol.name and symbol.entry.st_info.type == "STT_FUNC": if symbol.name and symbol.entry.st_info.type == "STT_FUNC":
# TODO symbol address # TODO(williballenthin): extract symbol address
# TODO symbol version info? # TODO(williballenthin): is it useful to extract the symbol version info?
yield Import(symbol.name), FileOffsetAddress(0x0) yield Import(symbol.name), FileOffsetAddress(0x0)
@@ -68,7 +68,7 @@ def extract_file_format(**kwargs):
def extract_file_arch(elf, **kwargs): def extract_file_arch(elf, **kwargs):
# TODO merge with capa.features.extractors.elf.detect_elf_arch() # TODO(williballenthin): merge with capa.features.extractors.elf.detect_elf_arch()
arch = elf.get_machine_arch() arch = elf.get_machine_arch()
if arch == "x86": if arch == "x86":
yield Arch("i386"), NO_ADDRESS yield Arch("i386"), NO_ADDRESS
@@ -85,7 +85,7 @@ def extract_file_features(elf: ELFFile, buf: bytes) -> Iterator[Tuple[Feature, i
FILE_HANDLERS = ( FILE_HANDLERS = (
# TODO extract_file_export_names, # TODO(williballenthin): implement extract_file_export_names
extract_file_import_names, extract_file_import_names,
extract_file_section_names, extract_file_section_names,
extract_file_strings, extract_file_strings,

View File

@@ -28,7 +28,7 @@ def find_byte_sequence(start: int, end: int, seq: bytes) -> Iterator[int]:
""" """
seqstr = " ".join([f"{b:02x}" for b in seq]) seqstr = " ".join([f"{b:02x}" for b in seq])
while True: while True:
# TODO find_binary: Deprecated. Please use ida_bytes.bin_search() instead. # TODO(mike-hunhoff): find_binary is deprecated. Please use ida_bytes.bin_search() instead.
ea = idaapi.find_binary(start, end, seqstr, 0, idaapi.SEARCH_DOWN) ea = idaapi.find_binary(start, end, seqstr, 0, idaapi.SEARCH_DOWN)
if ea == idaapi.BADADDR: if ea == idaapi.BADADDR:
break break
@@ -106,7 +106,8 @@ def get_file_imports() -> Dict[int, Tuple[str, str, int]]:
# IDA uses section names for the library of ELF imports, like ".dynsym". # IDA uses section names for the library of ELF imports, like ".dynsym".
# These are not useful to us, we may need to expand this list over time # These are not useful to us, we may need to expand this list over time
# TODO: exhaust this list, see #1419 # TODO(williballenthin): find all section names used by IDA
# https://github.com/mandiant/capa/issues/1419
if library == ".dynsym": if library == ".dynsym":
library = "" library = ""

View File

@@ -405,7 +405,8 @@ def extract_insn_peb_access_characteristic_features(
disasm = idc.GetDisasm(insn.ea) disasm = idc.GetDisasm(insn.ea)
if " fs:30h" in disasm or " gs:60h" in disasm: if " fs:30h" in disasm or " gs:60h" in disasm:
# TODO: replace above with proper IDA # TODO(mike-hunhoff): use proper IDA API for fetching segment access
# scanning the disassembly text is a hack.
yield Characteristic("peb access"), ih.address yield Characteristic("peb access"), ih.address
@@ -426,11 +427,13 @@ def extract_insn_segment_access_features(
disasm = idc.GetDisasm(insn.ea) disasm = idc.GetDisasm(insn.ea)
if " fs:" in disasm: if " fs:" in disasm:
# TODO: replace above with proper IDA # TODO(mike-hunhoff): use proper IDA API for fetching segment access
# scanning the disassembly text is a hack.
yield Characteristic("fs access"), ih.address yield Characteristic("fs access"), ih.address
if " gs:" in disasm: if " gs:" in disasm:
# TODO: replace above with proper IDA # TODO(mike-hunhoff): use proper IDA API for fetching segment access
# scanning the disassembly text is a hack.
yield Characteristic("gs access"), ih.address yield Characteristic("gs access"), ih.address

View File

@@ -92,7 +92,6 @@ def is_mov_imm_to_stack(instr: envi.archs.i386.disasm.i386Opcode) -> bool:
if not src.isImmed(): if not src.isImmed():
return False return False
# TODO what about 64-bit operands?
if not isinstance(dst, envi.archs.i386.disasm.i386SibOper) and not isinstance( if not isinstance(dst, envi.archs.i386.disasm.i386SibOper) and not isinstance(
dst, envi.archs.i386.disasm.i386RegMemOper dst, envi.archs.i386.disasm.i386RegMemOper
): ):

View File

@@ -351,7 +351,6 @@ def is_security_cookie(f, bb, insn) -> bool:
if oper.isReg() and oper.reg not in [ if oper.isReg() and oper.reg not in [
envi.archs.i386.regs.REG_ESP, envi.archs.i386.regs.REG_ESP,
envi.archs.i386.regs.REG_EBP, envi.archs.i386.regs.REG_EBP,
# TODO: do x64 support for real.
envi.archs.amd64.regs.REG_RBP, envi.archs.amd64.regs.REG_RBP,
envi.archs.amd64.regs.REG_RSP, envi.archs.amd64.regs.REG_RSP,
]: ]:
@@ -422,7 +421,6 @@ def extract_insn_peb_access_characteristic_features(f, bb, ih: InsnHandle) -> It
""" """
parse peb access from the given function. fs:[0x30] on x86, gs:[0x60] on x64 parse peb access from the given function. fs:[0x30] on x86, gs:[0x60] on x64
""" """
# TODO handle where fs/gs are loaded into a register or onto the stack and used later
insn: envi.Opcode = ih.inner insn: envi.Opcode = ih.inner
if insn.mnem not in ["push", "mov"]: if insn.mnem not in ["push", "mov"]:
@@ -646,7 +644,6 @@ def extract_op_offset_features(
if oper.reg == envi.archs.i386.regs.REG_EBP: if oper.reg == envi.archs.i386.regs.REG_EBP:
return return
# TODO: do x64 support for real.
if oper.reg == envi.archs.amd64.regs.REG_RBP: if oper.reg == envi.archs.amd64.regs.REG_RBP:
return return

View File

@@ -37,13 +37,13 @@ class CapaRuleGenFeatureCacheNode:
self.children: Set[CapaRuleGenFeatureCacheNode] = set() self.children: Set[CapaRuleGenFeatureCacheNode] = set()
def __hash__(self): def __hash__(self):
# TODO: unique enough? # TODO(mike-hunhoff): confirm this is unique enough
return hash((self.address,)) return hash((self.address,))
def __eq__(self, other): def __eq__(self, other):
if not isinstance(other, type(self)): if not isinstance(other, type(self)):
return NotImplemented return NotImplemented
# TODO: unique enough? # TODO(mike-hunhoff): confirm this is unique enough
return self.address == other.address return self.address == other.address

View File

@@ -791,7 +791,7 @@ class CapaExplorerForm(idaapi.PluginForm):
try: try:
# support binary files specifically for x86/AMD64 shellcode # support binary files specifically for x86/AMD64 shellcode
# warn user binary file is loaded but still allow capa to process it # warn user binary file is loaded but still allow capa to process it
# TODO: check specific architecture of binary files based on how user configured IDA processors # TODO(mike-hunhoff): check specific architecture of binary files based on how user configured IDA processors
if idaapi.get_file_type_name() == "Binary file": if idaapi.get_file_type_name() == "Binary file":
logger.warning("-" * 80) logger.warning("-" * 80)
logger.warning(" Input file appears to be a binary file.") logger.warning(" Input file appears to be a binary file.")

View File

@@ -372,7 +372,7 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
display += f" ({statement.description})" display += f" ({statement.description})"
return CapaExplorerDefaultItem(parent, display) return CapaExplorerDefaultItem(parent, display)
elif isinstance(statement, rd.CompoundStatement) and statement.type == rd.CompoundStatementType.NOT: elif isinstance(statement, rd.CompoundStatement) and statement.type == rd.CompoundStatementType.NOT:
# TODO: do we display 'not' # TODO(mike-hunhoff): verify that we can display NOT statements
pass pass
elif isinstance(statement, rd.SomeStatement): elif isinstance(statement, rd.SomeStatement):
display = f"{statement.count} or more" display = f"{statement.count} or more"
@@ -421,7 +421,7 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
@param doc: result doc @param doc: result doc
""" """
if not match.success: if not match.success:
# TODO: display failed branches at some point? Help with debugging rules? # TODO(mike-hunhoff): display failed branches at some point? Help with debugging rules?
return return
# optional statement with no successful children is empty # optional statement with no successful children is empty

View File

@@ -502,7 +502,7 @@ class CapaExplorerRulegenEditor(QtWidgets.QTreeWidget):
feature, description, comment = (o.strip() for o in tuple(o.text(i) for i in range(3))) feature, description, comment = (o.strip() for o in tuple(o.text(i) for i in range(3)))
rule_text += parse_node_for_feature(feature, description, comment, calc_item_depth(o)) rule_text += parse_node_for_feature(feature, description, comment, calc_item_depth(o))
# FIXME we avoid circular update by disabling signals when updating # TODO(mike-hunhoff): we avoid circular update by disabling signals when updating
# the preview. Preferably we would refactor the code to avoid this # the preview. Preferably we would refactor the code to avoid this
# in the first place # in the first place
self.preview.blockSignals(True) self.preview.blockSignals(True)

View File

@@ -326,10 +326,9 @@ def find_capabilities(ruleset: RuleSet, extractor: FeatureExtractor, disable_pro
return matches, meta return matches, meta
# TODO move all to helpers? def has_rule_with_namespace(rules: RuleSet, capabilities: MatchResults, namespace: str) -> bool:
def has_rule_with_namespace(rules, capabilities, rule_cat):
for rule_name in capabilities.keys(): for rule_name in capabilities.keys():
if rules.rules[rule_name].meta.get("namespace", "").startswith(rule_cat): if rules.rules[rule_name].meta.get("namespace", "").startswith(namespace):
return True return True
return False return False
@@ -484,7 +483,6 @@ def get_workspace(path, format_, sigpaths):
import viv_utils.flirt import viv_utils.flirt
logger.debug("generating vivisect workspace for: %s", path) logger.debug("generating vivisect workspace for: %s", path)
# TODO should not be auto at this point, anymore
if format_ == FORMAT_AUTO: if format_ == FORMAT_AUTO:
if not is_supported_format(path): if not is_supported_format(path):
raise UnsupportedFormatError() raise UnsupportedFormatError()
@@ -509,7 +507,6 @@ def get_workspace(path, format_, sigpaths):
return vw return vw
# TODO get_extractors -> List[FeatureExtractor]?
def get_extractor( def get_extractor(
path: str, path: str,
format_: str, format_: str,
@@ -1165,8 +1162,8 @@ def main(argv=None):
rules = rules.filter_rules_by_meta(args.tag) rules = rules.filter_rules_by_meta(args.tag)
logger.debug("selected %d rules", len(rules)) logger.debug("selected %d rules", len(rules))
for i, r in enumerate(rules.rules, 1): for i, r in enumerate(rules.rules, 1):
# TODO don't display subscope rules?
logger.debug(" %d. %s", i, r) logger.debug(" %d. %s", i, r)
except (IOError, capa.rules.InvalidRule, capa.rules.InvalidRuleSet) as e: except (IOError, capa.rules.InvalidRule, capa.rules.InvalidRuleSet) as e:
logger.error("%s", str(e)) logger.error("%s", str(e))
logger.error( logger.error(

View File

@@ -22,7 +22,7 @@ def get_node_cost(node):
# substring and regex features require a full scan of each string # substring and regex features require a full scan of each string
# which we anticipate is more expensive then a hash lookup feature (e.g. mnemonic or count). # which we anticipate is more expensive then a hash lookup feature (e.g. mnemonic or count).
# #
# TODO: compute the average cost of these feature relative to hash feature # fun research: compute the average cost of these feature relative to hash feature
# and adjust the factor accordingly. # and adjust the factor accordingly.
return 2 return 2

View File

@@ -510,7 +510,9 @@ def build_statements(d, scope: str):
# arg is string (which doesn't support inline descriptions), like: # arg is string (which doesn't support inline descriptions), like:
# #
# count(string(error)) # count(string(error))
# TODO: what about embedded newlines? #
# known problem that embedded newlines may not work here?
# this may become a problem (or not), so address it when encountered.
feature = Feature(arg) feature = Feature(arg)
else: else:
feature = Feature() feature = Feature()
@@ -1190,7 +1192,6 @@ class RuleSet:
# so thats not helpful to decide how to downselect. # so thats not helpful to decide how to downselect.
# #
# and, a global rule will never be the sole selector in a rule. # and, a global rule will never be the sole selector in a rule.
# TODO: probably want a lint for this.
pass pass
else: else:
# easy feature: hash lookup # easy feature: hash lookup

View File

@@ -54,7 +54,7 @@ var_names = ["".join(letters) for letters in itertools.product(string.ascii_lowe
# this have to be the internal names used by capa.py which are sometimes different to the ones written out in the rules, e.g. "2 or more" is "Some", count is Range # this have to be the internal names used by capa.py which are sometimes different to the ones written out in the rules, e.g. "2 or more" is "Some", count is Range
unsupported = ["characteristic", "mnemonic", "offset", "subscope", "Range"] unsupported = ["characteristic", "mnemonic", "offset", "subscope", "Range"]
# TODO shorten this list, possible stuff: # further idea: shorten this list, possible stuff:
# - 2 or more strings: e.g. # - 2 or more strings: e.g.
# -- https://github.com/mandiant/capa-rules/blob/master/collection/file-managers/gather-direct-ftp-information.yml # -- https://github.com/mandiant/capa-rules/blob/master/collection/file-managers/gather-direct-ftp-information.yml
# -- https://github.com/mandiant/capa-rules/blob/master/collection/browser/gather-firefox-profile-information.yml # -- https://github.com/mandiant/capa-rules/blob/master/collection/browser/gather-firefox-profile-information.yml
@@ -172,14 +172,16 @@ def convert_rule(rule, rulename, cround, depth):
yara_strings += "\t$" + var_name + ' = "' + string + '" ascii wide' + convert_description(kid) + "\n" yara_strings += "\t$" + var_name + ' = "' + string + '" ascii wide' + convert_description(kid) + "\n"
yara_condition += "\t$" + var_name + " " yara_condition += "\t$" + var_name + " "
elif s_type == "api" or s_type == "import": elif s_type == "api" or s_type == "import":
# TODO: is it possible in YARA to make a difference between api & import? # research needed to decide if its possible in YARA to make a difference between api & import?
# https://github.com/mandiant/capa-rules/blob/master/doc/format.md#api # https://github.com/mandiant/capa-rules/blob/master/doc/format.md#api
api = kid.value api = kid.value
logger.info("doing api: %r", api) logger.info("doing api: %r", api)
# e.g. kernel32.CreateNamedPipe => look for kernel32.dll and CreateNamedPipe # e.g. kernel32.CreateNamedPipe => look for kernel32.dll and CreateNamedPipe
# TODO: improve .NET API call handling #
# note: the handling of .NET API calls could be improved here.
# once we have a motivation and some examples, lets do that.
if "::" in api: if "::" in api:
mod, api = api.split("::") mod, api = api.split("::")
@@ -204,7 +206,7 @@ def convert_rule(rule, rulename, cround, depth):
var_name = "api_" + var_names.pop(0) var_name = "api_" + var_names.pop(0)
# limit regex with word boundary \b but also search for appended A and W # limit regex with word boundary \b but also search for appended A and W
# TODO: better use something like /(\\x00|\\x01|\\x02|\\x03|\\x04)' + api + '(A|W)?\\x00/ ??? # alternatively: use something like /(\\x00|\\x01|\\x02|\\x03|\\x04)' + api + '(A|W)?\\x00/ ???
yara_strings += "\t$" + var_name + " = /\\b" + api + "(A|W)?\\b/ ascii wide\n" yara_strings += "\t$" + var_name + " = /\\b" + api + "(A|W)?\\b/ ascii wide\n"
yara_condition += "\t$" + var_name + " " yara_condition += "\t$" + var_name + " "
@@ -679,8 +681,6 @@ def convert_rules(rules, namespaces, cround, make_priv):
yara += " condition:" + condition_header + yara_condition + "\n}" yara += " condition:" + condition_header + yara_condition + "\n}"
# TODO: now the rule is finished and could be automatically checked with the capa-testfile(s) named in meta
# (doing it for all of them using yara-ci upload at the moment)
output_yar(yara) output_yar(yara)
converted_rules.append(rule_name) converted_rules.append(rule_name)
count_incomplete += incomplete count_incomplete += incomplete

View File

@@ -142,7 +142,6 @@ def main(argv=None):
if args.function: if args.function:
if args.format == "freeze": if args.format == "freeze":
# TODO fix
function_handles = tuple(filter(lambda fh: fh.address == args.function, function_handles)) function_handles = tuple(filter(lambda fh: fh.address == args.function, function_handles))
else: else:
function_handles = tuple(filter(lambda fh: format_address(fh.address) == args.function, function_handles)) function_handles = tuple(filter(lambda fh: format_address(fh.address) == args.function, function_handles))

View File

@@ -81,6 +81,7 @@ setuptools.setup(
"flake8-logging-format==0.9.0", "flake8-logging-format==0.9.0",
"flake8-no-implicit-concat==0.3.4", "flake8-no-implicit-concat==0.3.4",
"flake8-print==5.0.0", "flake8-print==5.0.0",
"flake8-todos==0.3.0",
"ruff==0.0.275", "ruff==0.0.275",
"black==23.3.0", "black==23.3.0",
"isort==5.11.4", "isort==5.11.4",

View File

@@ -653,7 +653,6 @@ FEATURE_PRESENCE_TESTS = sorted(
# insn/api: call via jmp # insn/api: call via jmp
("mimikatz", "function=0x40B3C6", capa.features.insn.API("LocalFree"), True), ("mimikatz", "function=0x40B3C6", capa.features.insn.API("LocalFree"), True),
("c91887...", "function=0x40156F", capa.features.insn.API("CloseClipboard"), True), ("c91887...", "function=0x40156F", capa.features.insn.API("CloseClipboard"), True),
# TODO ignore thunk functions that call via jmp?
# insn/api: resolve indirect calls # insn/api: resolve indirect calls
("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.CreatePipe"), True), ("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.CreatePipe"), True),
("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.SetHandleInformation"), True), ("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.SetHandleInformation"), True),