introduce flake8-todos linter

This commit is contained in:
Willi Ballenthin
2023-07-09 23:35:52 +02:00
parent 4a49543d12
commit ae10a2ea34
19 changed files with 41 additions and 41 deletions

View File

@@ -21,6 +21,7 @@ if TYPE_CHECKING:
import capa.perf
import capa.features
import capa.features.extractors.elf
import capa.features.freeze.features
from capa.features.address import Address
logger = logging.getLogger(__name__)
@@ -127,9 +128,9 @@ class Feature(abc.ABC): # noqa: B024
return self.name == other.name and self.value == other.value
def __lt__(self, other):
# TODO: this is a huge hack!
import capa.features.freeze.features
# implementing sorting by serializing to JSON is a huge hack.
# its slow, inelegant, and probably doesn't work intuitively;
# however, we only use it for deterministic output, so it's good enough for now.
return (
capa.features.freeze.features.feature_from_capa(self).json()
< capa.features.freeze.features.feature_from_capa(other).json()

View File

@@ -41,7 +41,9 @@ def unmangle_c_name(name: str) -> str:
# _lstrlenWStub@4
# A small optimization to avoid running the regex too many times
# TODO: this still increases the unit test execution time from 170s to 200s, should be able to accelerate it
# this still increases the unit test execution time from 170s to 200s, should be able to accelerate it
#
# TODO(xusheng): performance optimizations to improve test execution time
if name[0] in ["@", "_"]:
match = re.match(r"^[@|_](.*?)(Stub)?(@\d+)?$", name)
if match:

View File

@@ -324,7 +324,7 @@ def extract_insn_offset_features(
def is_nzxor_stack_cookie(f: Function, bb: BinjaBasicBlock, llil: LowLevelILInstruction) -> bool:
"""check if nzxor exists within stack cookie delta"""
# TODO: we can do a much accurate analysi using LLIL SSA
# TODO(xusheng): use LLIL SSA to do more accurate analysis
reg_names = []
if llil.left.operation == LowLevelILOperation.LLIL_REG:

View File

@@ -36,8 +36,8 @@ def extract_file_import_names(elf, **kwargs):
for _, symbol in enumerate(section.iter_symbols()):
if symbol.name and symbol.entry.st_info.type == "STT_FUNC":
# TODO symbol address
# TODO symbol version info?
# TODO(williballenthin): extract symbol address
# TODO(williballenthin): is it useful to extract the symbol version info?
yield Import(symbol.name), FileOffsetAddress(0x0)
@@ -68,7 +68,7 @@ def extract_file_format(**kwargs):
def extract_file_arch(elf, **kwargs):
# TODO merge with capa.features.extractors.elf.detect_elf_arch()
# TODO(williballenthin): merge with capa.features.extractors.elf.detect_elf_arch()
arch = elf.get_machine_arch()
if arch == "x86":
yield Arch("i386"), NO_ADDRESS
@@ -85,7 +85,7 @@ def extract_file_features(elf: ELFFile, buf: bytes) -> Iterator[Tuple[Feature, i
FILE_HANDLERS = (
# TODO extract_file_export_names,
# TODO(williballenthin): implement extract_file_export_names
extract_file_import_names,
extract_file_section_names,
extract_file_strings,

View File

@@ -28,7 +28,7 @@ def find_byte_sequence(start: int, end: int, seq: bytes) -> Iterator[int]:
"""
seqstr = " ".join([f"{b:02x}" for b in seq])
while True:
# TODO find_binary: Deprecated. Please use ida_bytes.bin_search() instead.
# TODO(mike-hunhoff): find_binary is deprecated. Please use ida_bytes.bin_search() instead.
ea = idaapi.find_binary(start, end, seqstr, 0, idaapi.SEARCH_DOWN)
if ea == idaapi.BADADDR:
break
@@ -106,7 +106,8 @@ def get_file_imports() -> Dict[int, Tuple[str, str, int]]:
# IDA uses section names for the library of ELF imports, like ".dynsym".
# These are not useful to us, we may need to expand this list over time
# TODO: exhaust this list, see #1419
# TODO(williballenthin): find all section names used by IDA
# https://github.com/mandiant/capa/issues/1419
if library == ".dynsym":
library = ""

View File

@@ -405,7 +405,8 @@ def extract_insn_peb_access_characteristic_features(
disasm = idc.GetDisasm(insn.ea)
if " fs:30h" in disasm or " gs:60h" in disasm:
# TODO: replace above with proper IDA
# TODO(mike-hunhoff): use proper IDA API for fetching segment access
# scanning the disassembly text is a hack.
yield Characteristic("peb access"), ih.address
@@ -426,11 +427,13 @@ def extract_insn_segment_access_features(
disasm = idc.GetDisasm(insn.ea)
if " fs:" in disasm:
# TODO: replace above with proper IDA
# TODO(mike-hunhoff): use proper IDA API for fetching segment access
# scanning the disassembly text is a hack.
yield Characteristic("fs access"), ih.address
if " gs:" in disasm:
# TODO: replace above with proper IDA
# TODO(mike-hunhoff): use proper IDA API for fetching segment access
# scanning the disassembly text is a hack.
yield Characteristic("gs access"), ih.address

View File

@@ -92,7 +92,6 @@ def is_mov_imm_to_stack(instr: envi.archs.i386.disasm.i386Opcode) -> bool:
if not src.isImmed():
return False
# TODO what about 64-bit operands?
if not isinstance(dst, envi.archs.i386.disasm.i386SibOper) and not isinstance(
dst, envi.archs.i386.disasm.i386RegMemOper
):

View File

@@ -351,7 +351,6 @@ def is_security_cookie(f, bb, insn) -> bool:
if oper.isReg() and oper.reg not in [
envi.archs.i386.regs.REG_ESP,
envi.archs.i386.regs.REG_EBP,
# TODO: do x64 support for real.
envi.archs.amd64.regs.REG_RBP,
envi.archs.amd64.regs.REG_RSP,
]:
@@ -422,7 +421,6 @@ def extract_insn_peb_access_characteristic_features(f, bb, ih: InsnHandle) -> It
"""
parse peb access from the given function. fs:[0x30] on x86, gs:[0x60] on x64
"""
# TODO handle where fs/gs are loaded into a register or onto the stack and used later
insn: envi.Opcode = ih.inner
if insn.mnem not in ["push", "mov"]:
@@ -646,7 +644,6 @@ def extract_op_offset_features(
if oper.reg == envi.archs.i386.regs.REG_EBP:
return
# TODO: do x64 support for real.
if oper.reg == envi.archs.amd64.regs.REG_RBP:
return

View File

@@ -37,13 +37,13 @@ class CapaRuleGenFeatureCacheNode:
self.children: Set[CapaRuleGenFeatureCacheNode] = set()
def __hash__(self):
# TODO: unique enough?
# TODO(mike-hunhoff): confirm this is unique enough
return hash((self.address,))
def __eq__(self, other):
if not isinstance(other, type(self)):
return NotImplemented
# TODO: unique enough?
# TODO(mike-hunhoff): confirm this is unique enough
return self.address == other.address

View File

@@ -791,7 +791,7 @@ class CapaExplorerForm(idaapi.PluginForm):
try:
# support binary files specifically for x86/AMD64 shellcode
# warn user binary file is loaded but still allow capa to process it
# TODO: check specific architecture of binary files based on how user configured IDA processors
# TODO(mike-hunhoff): check specific architecture of binary files based on how user configured IDA processors
if idaapi.get_file_type_name() == "Binary file":
logger.warning("-" * 80)
logger.warning(" Input file appears to be a binary file.")

View File

@@ -372,7 +372,7 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
display += f" ({statement.description})"
return CapaExplorerDefaultItem(parent, display)
elif isinstance(statement, rd.CompoundStatement) and statement.type == rd.CompoundStatementType.NOT:
# TODO: do we display 'not'
# TODO(mike-hunhoff): verify that we can display NOT statements
pass
elif isinstance(statement, rd.SomeStatement):
display = f"{statement.count} or more"
@@ -421,7 +421,7 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
@param doc: result doc
"""
if not match.success:
# TODO: display failed branches at some point? Help with debugging rules?
# TODO(mike-hunhoff): display failed branches at some point? Help with debugging rules?
return
# optional statement with no successful children is empty

View File

@@ -502,7 +502,7 @@ class CapaExplorerRulegenEditor(QtWidgets.QTreeWidget):
feature, description, comment = (o.strip() for o in tuple(o.text(i) for i in range(3)))
rule_text += parse_node_for_feature(feature, description, comment, calc_item_depth(o))
# FIXME we avoid circular update by disabling signals when updating
# TODO(mike-hunhoff): we avoid circular update by disabling signals when updating
# the preview. Preferably we would refactor the code to avoid this
# in the first place
self.preview.blockSignals(True)

View File

@@ -326,10 +326,9 @@ def find_capabilities(ruleset: RuleSet, extractor: FeatureExtractor, disable_pro
return matches, meta
# TODO move all to helpers?
def has_rule_with_namespace(rules, capabilities, rule_cat):
def has_rule_with_namespace(rules: RuleSet, capabilities: MatchResults, namespace: str) -> bool:
for rule_name in capabilities.keys():
if rules.rules[rule_name].meta.get("namespace", "").startswith(rule_cat):
if rules.rules[rule_name].meta.get("namespace", "").startswith(namespace):
return True
return False
@@ -484,7 +483,6 @@ def get_workspace(path, format_, sigpaths):
import viv_utils.flirt
logger.debug("generating vivisect workspace for: %s", path)
# TODO should not be auto at this point, anymore
if format_ == FORMAT_AUTO:
if not is_supported_format(path):
raise UnsupportedFormatError()
@@ -509,7 +507,6 @@ def get_workspace(path, format_, sigpaths):
return vw
# TODO get_extractors -> List[FeatureExtractor]?
def get_extractor(
path: str,
format_: str,
@@ -1165,8 +1162,8 @@ def main(argv=None):
rules = rules.filter_rules_by_meta(args.tag)
logger.debug("selected %d rules", len(rules))
for i, r in enumerate(rules.rules, 1):
# TODO don't display subscope rules?
logger.debug(" %d. %s", i, r)
except (IOError, capa.rules.InvalidRule, capa.rules.InvalidRuleSet) as e:
logger.error("%s", str(e))
logger.error(

View File

@@ -22,7 +22,7 @@ def get_node_cost(node):
# substring and regex features require a full scan of each string
# which we anticipate is more expensive then a hash lookup feature (e.g. mnemonic or count).
#
# TODO: compute the average cost of these feature relative to hash feature
# fun research: compute the average cost of these feature relative to hash feature
# and adjust the factor accordingly.
return 2

View File

@@ -510,7 +510,9 @@ def build_statements(d, scope: str):
# arg is string (which doesn't support inline descriptions), like:
#
# count(string(error))
# TODO: what about embedded newlines?
#
# known problem that embedded newlines may not work here?
# this may become a problem (or not), so address it when encountered.
feature = Feature(arg)
else:
feature = Feature()
@@ -1190,7 +1192,6 @@ class RuleSet:
# so thats not helpful to decide how to downselect.
#
# and, a global rule will never be the sole selector in a rule.
# TODO: probably want a lint for this.
pass
else:
# easy feature: hash lookup

View File

@@ -54,7 +54,7 @@ var_names = ["".join(letters) for letters in itertools.product(string.ascii_lowe
# this have to be the internal names used by capa.py which are sometimes different to the ones written out in the rules, e.g. "2 or more" is "Some", count is Range
unsupported = ["characteristic", "mnemonic", "offset", "subscope", "Range"]
# TODO shorten this list, possible stuff:
# further idea: shorten this list, possible stuff:
# - 2 or more strings: e.g.
# -- https://github.com/mandiant/capa-rules/blob/master/collection/file-managers/gather-direct-ftp-information.yml
# -- https://github.com/mandiant/capa-rules/blob/master/collection/browser/gather-firefox-profile-information.yml
@@ -172,14 +172,16 @@ def convert_rule(rule, rulename, cround, depth):
yara_strings += "\t$" + var_name + ' = "' + string + '" ascii wide' + convert_description(kid) + "\n"
yara_condition += "\t$" + var_name + " "
elif s_type == "api" or s_type == "import":
# TODO: is it possible in YARA to make a difference between api & import?
# research needed to decide if its possible in YARA to make a difference between api & import?
# https://github.com/mandiant/capa-rules/blob/master/doc/format.md#api
api = kid.value
logger.info("doing api: %r", api)
# e.g. kernel32.CreateNamedPipe => look for kernel32.dll and CreateNamedPipe
# TODO: improve .NET API call handling
#
# note: the handling of .NET API calls could be improved here.
# once we have a motivation and some examples, lets do that.
if "::" in api:
mod, api = api.split("::")
@@ -204,7 +206,7 @@ def convert_rule(rule, rulename, cround, depth):
var_name = "api_" + var_names.pop(0)
# limit regex with word boundary \b but also search for appended A and W
# TODO: better use something like /(\\x00|\\x01|\\x02|\\x03|\\x04)' + api + '(A|W)?\\x00/ ???
# alternatively: use something like /(\\x00|\\x01|\\x02|\\x03|\\x04)' + api + '(A|W)?\\x00/ ???
yara_strings += "\t$" + var_name + " = /\\b" + api + "(A|W)?\\b/ ascii wide\n"
yara_condition += "\t$" + var_name + " "
@@ -679,8 +681,6 @@ def convert_rules(rules, namespaces, cround, make_priv):
yara += " condition:" + condition_header + yara_condition + "\n}"
# TODO: now the rule is finished and could be automatically checked with the capa-testfile(s) named in meta
# (doing it for all of them using yara-ci upload at the moment)
output_yar(yara)
converted_rules.append(rule_name)
count_incomplete += incomplete

View File

@@ -142,7 +142,6 @@ def main(argv=None):
if args.function:
if args.format == "freeze":
# TODO fix
function_handles = tuple(filter(lambda fh: fh.address == args.function, function_handles))
else:
function_handles = tuple(filter(lambda fh: format_address(fh.address) == args.function, function_handles))

View File

@@ -81,6 +81,7 @@ setuptools.setup(
"flake8-logging-format==0.9.0",
"flake8-no-implicit-concat==0.3.4",
"flake8-print==5.0.0",
"flake8-todos==0.3.0",
"ruff==0.0.275",
"black==23.3.0",
"isort==5.11.4",

View File

@@ -653,7 +653,6 @@ FEATURE_PRESENCE_TESTS = sorted(
# insn/api: call via jmp
("mimikatz", "function=0x40B3C6", capa.features.insn.API("LocalFree"), True),
("c91887...", "function=0x40156F", capa.features.insn.API("CloseClipboard"), True),
# TODO ignore thunk functions that call via jmp?
# insn/api: resolve indirect calls
("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.CreatePipe"), True),
("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.SetHandleInformation"), True),