mirror of
https://github.com/mandiant/capa.git
synced 2025-12-12 15:49:46 -08:00
introduce flake8-todos linter
This commit is contained in:
@@ -21,6 +21,7 @@ if TYPE_CHECKING:
|
||||
import capa.perf
|
||||
import capa.features
|
||||
import capa.features.extractors.elf
|
||||
import capa.features.freeze.features
|
||||
from capa.features.address import Address
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -127,9 +128,9 @@ class Feature(abc.ABC): # noqa: B024
|
||||
return self.name == other.name and self.value == other.value
|
||||
|
||||
def __lt__(self, other):
|
||||
# TODO: this is a huge hack!
|
||||
import capa.features.freeze.features
|
||||
|
||||
# implementing sorting by serializing to JSON is a huge hack.
|
||||
# its slow, inelegant, and probably doesn't work intuitively;
|
||||
# however, we only use it for deterministic output, so it's good enough for now.
|
||||
return (
|
||||
capa.features.freeze.features.feature_from_capa(self).json()
|
||||
< capa.features.freeze.features.feature_from_capa(other).json()
|
||||
|
||||
@@ -41,7 +41,9 @@ def unmangle_c_name(name: str) -> str:
|
||||
# _lstrlenWStub@4
|
||||
|
||||
# A small optimization to avoid running the regex too many times
|
||||
# TODO: this still increases the unit test execution time from 170s to 200s, should be able to accelerate it
|
||||
# this still increases the unit test execution time from 170s to 200s, should be able to accelerate it
|
||||
#
|
||||
# TODO(xusheng): performance optimizations to improve test execution time
|
||||
if name[0] in ["@", "_"]:
|
||||
match = re.match(r"^[@|_](.*?)(Stub)?(@\d+)?$", name)
|
||||
if match:
|
||||
|
||||
@@ -324,7 +324,7 @@ def extract_insn_offset_features(
|
||||
|
||||
def is_nzxor_stack_cookie(f: Function, bb: BinjaBasicBlock, llil: LowLevelILInstruction) -> bool:
|
||||
"""check if nzxor exists within stack cookie delta"""
|
||||
# TODO: we can do a much accurate analysi using LLIL SSA
|
||||
# TODO(xusheng): use LLIL SSA to do more accurate analysis
|
||||
|
||||
reg_names = []
|
||||
if llil.left.operation == LowLevelILOperation.LLIL_REG:
|
||||
|
||||
@@ -36,8 +36,8 @@ def extract_file_import_names(elf, **kwargs):
|
||||
|
||||
for _, symbol in enumerate(section.iter_symbols()):
|
||||
if symbol.name and symbol.entry.st_info.type == "STT_FUNC":
|
||||
# TODO symbol address
|
||||
# TODO symbol version info?
|
||||
# TODO(williballenthin): extract symbol address
|
||||
# TODO(williballenthin): is it useful to extract the symbol version info?
|
||||
yield Import(symbol.name), FileOffsetAddress(0x0)
|
||||
|
||||
|
||||
@@ -68,7 +68,7 @@ def extract_file_format(**kwargs):
|
||||
|
||||
|
||||
def extract_file_arch(elf, **kwargs):
|
||||
# TODO merge with capa.features.extractors.elf.detect_elf_arch()
|
||||
# TODO(williballenthin): merge with capa.features.extractors.elf.detect_elf_arch()
|
||||
arch = elf.get_machine_arch()
|
||||
if arch == "x86":
|
||||
yield Arch("i386"), NO_ADDRESS
|
||||
@@ -85,7 +85,7 @@ def extract_file_features(elf: ELFFile, buf: bytes) -> Iterator[Tuple[Feature, i
|
||||
|
||||
|
||||
FILE_HANDLERS = (
|
||||
# TODO extract_file_export_names,
|
||||
# TODO(williballenthin): implement extract_file_export_names
|
||||
extract_file_import_names,
|
||||
extract_file_section_names,
|
||||
extract_file_strings,
|
||||
|
||||
@@ -28,7 +28,7 @@ def find_byte_sequence(start: int, end: int, seq: bytes) -> Iterator[int]:
|
||||
"""
|
||||
seqstr = " ".join([f"{b:02x}" for b in seq])
|
||||
while True:
|
||||
# TODO find_binary: Deprecated. Please use ida_bytes.bin_search() instead.
|
||||
# TODO(mike-hunhoff): find_binary is deprecated. Please use ida_bytes.bin_search() instead.
|
||||
ea = idaapi.find_binary(start, end, seqstr, 0, idaapi.SEARCH_DOWN)
|
||||
if ea == idaapi.BADADDR:
|
||||
break
|
||||
@@ -106,7 +106,8 @@ def get_file_imports() -> Dict[int, Tuple[str, str, int]]:
|
||||
|
||||
# IDA uses section names for the library of ELF imports, like ".dynsym".
|
||||
# These are not useful to us, we may need to expand this list over time
|
||||
# TODO: exhaust this list, see #1419
|
||||
# TODO(williballenthin): find all section names used by IDA
|
||||
# https://github.com/mandiant/capa/issues/1419
|
||||
if library == ".dynsym":
|
||||
library = ""
|
||||
|
||||
|
||||
@@ -405,7 +405,8 @@ def extract_insn_peb_access_characteristic_features(
|
||||
disasm = idc.GetDisasm(insn.ea)
|
||||
|
||||
if " fs:30h" in disasm or " gs:60h" in disasm:
|
||||
# TODO: replace above with proper IDA
|
||||
# TODO(mike-hunhoff): use proper IDA API for fetching segment access
|
||||
# scanning the disassembly text is a hack.
|
||||
yield Characteristic("peb access"), ih.address
|
||||
|
||||
|
||||
@@ -426,11 +427,13 @@ def extract_insn_segment_access_features(
|
||||
disasm = idc.GetDisasm(insn.ea)
|
||||
|
||||
if " fs:" in disasm:
|
||||
# TODO: replace above with proper IDA
|
||||
# TODO(mike-hunhoff): use proper IDA API for fetching segment access
|
||||
# scanning the disassembly text is a hack.
|
||||
yield Characteristic("fs access"), ih.address
|
||||
|
||||
if " gs:" in disasm:
|
||||
# TODO: replace above with proper IDA
|
||||
# TODO(mike-hunhoff): use proper IDA API for fetching segment access
|
||||
# scanning the disassembly text is a hack.
|
||||
yield Characteristic("gs access"), ih.address
|
||||
|
||||
|
||||
|
||||
@@ -92,7 +92,6 @@ def is_mov_imm_to_stack(instr: envi.archs.i386.disasm.i386Opcode) -> bool:
|
||||
if not src.isImmed():
|
||||
return False
|
||||
|
||||
# TODO what about 64-bit operands?
|
||||
if not isinstance(dst, envi.archs.i386.disasm.i386SibOper) and not isinstance(
|
||||
dst, envi.archs.i386.disasm.i386RegMemOper
|
||||
):
|
||||
|
||||
@@ -351,7 +351,6 @@ def is_security_cookie(f, bb, insn) -> bool:
|
||||
if oper.isReg() and oper.reg not in [
|
||||
envi.archs.i386.regs.REG_ESP,
|
||||
envi.archs.i386.regs.REG_EBP,
|
||||
# TODO: do x64 support for real.
|
||||
envi.archs.amd64.regs.REG_RBP,
|
||||
envi.archs.amd64.regs.REG_RSP,
|
||||
]:
|
||||
@@ -422,7 +421,6 @@ def extract_insn_peb_access_characteristic_features(f, bb, ih: InsnHandle) -> It
|
||||
"""
|
||||
parse peb access from the given function. fs:[0x30] on x86, gs:[0x60] on x64
|
||||
"""
|
||||
# TODO handle where fs/gs are loaded into a register or onto the stack and used later
|
||||
insn: envi.Opcode = ih.inner
|
||||
|
||||
if insn.mnem not in ["push", "mov"]:
|
||||
@@ -646,7 +644,6 @@ def extract_op_offset_features(
|
||||
if oper.reg == envi.archs.i386.regs.REG_EBP:
|
||||
return
|
||||
|
||||
# TODO: do x64 support for real.
|
||||
if oper.reg == envi.archs.amd64.regs.REG_RBP:
|
||||
return
|
||||
|
||||
|
||||
@@ -37,13 +37,13 @@ class CapaRuleGenFeatureCacheNode:
|
||||
self.children: Set[CapaRuleGenFeatureCacheNode] = set()
|
||||
|
||||
def __hash__(self):
|
||||
# TODO: unique enough?
|
||||
# TODO(mike-hunhoff): confirm this is unique enough
|
||||
return hash((self.address,))
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, type(self)):
|
||||
return NotImplemented
|
||||
# TODO: unique enough?
|
||||
# TODO(mike-hunhoff): confirm this is unique enough
|
||||
return self.address == other.address
|
||||
|
||||
|
||||
|
||||
@@ -791,7 +791,7 @@ class CapaExplorerForm(idaapi.PluginForm):
|
||||
try:
|
||||
# support binary files specifically for x86/AMD64 shellcode
|
||||
# warn user binary file is loaded but still allow capa to process it
|
||||
# TODO: check specific architecture of binary files based on how user configured IDA processors
|
||||
# TODO(mike-hunhoff): check specific architecture of binary files based on how user configured IDA processors
|
||||
if idaapi.get_file_type_name() == "Binary file":
|
||||
logger.warning("-" * 80)
|
||||
logger.warning(" Input file appears to be a binary file.")
|
||||
|
||||
@@ -372,7 +372,7 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
|
||||
display += f" ({statement.description})"
|
||||
return CapaExplorerDefaultItem(parent, display)
|
||||
elif isinstance(statement, rd.CompoundStatement) and statement.type == rd.CompoundStatementType.NOT:
|
||||
# TODO: do we display 'not'
|
||||
# TODO(mike-hunhoff): verify that we can display NOT statements
|
||||
pass
|
||||
elif isinstance(statement, rd.SomeStatement):
|
||||
display = f"{statement.count} or more"
|
||||
@@ -421,7 +421,7 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
|
||||
@param doc: result doc
|
||||
"""
|
||||
if not match.success:
|
||||
# TODO: display failed branches at some point? Help with debugging rules?
|
||||
# TODO(mike-hunhoff): display failed branches at some point? Help with debugging rules?
|
||||
return
|
||||
|
||||
# optional statement with no successful children is empty
|
||||
|
||||
@@ -502,7 +502,7 @@ class CapaExplorerRulegenEditor(QtWidgets.QTreeWidget):
|
||||
feature, description, comment = (o.strip() for o in tuple(o.text(i) for i in range(3)))
|
||||
rule_text += parse_node_for_feature(feature, description, comment, calc_item_depth(o))
|
||||
|
||||
# FIXME we avoid circular update by disabling signals when updating
|
||||
# TODO(mike-hunhoff): we avoid circular update by disabling signals when updating
|
||||
# the preview. Preferably we would refactor the code to avoid this
|
||||
# in the first place
|
||||
self.preview.blockSignals(True)
|
||||
|
||||
@@ -326,10 +326,9 @@ def find_capabilities(ruleset: RuleSet, extractor: FeatureExtractor, disable_pro
|
||||
return matches, meta
|
||||
|
||||
|
||||
# TODO move all to helpers?
|
||||
def has_rule_with_namespace(rules, capabilities, rule_cat):
|
||||
def has_rule_with_namespace(rules: RuleSet, capabilities: MatchResults, namespace: str) -> bool:
|
||||
for rule_name in capabilities.keys():
|
||||
if rules.rules[rule_name].meta.get("namespace", "").startswith(rule_cat):
|
||||
if rules.rules[rule_name].meta.get("namespace", "").startswith(namespace):
|
||||
return True
|
||||
return False
|
||||
|
||||
@@ -484,7 +483,6 @@ def get_workspace(path, format_, sigpaths):
|
||||
import viv_utils.flirt
|
||||
|
||||
logger.debug("generating vivisect workspace for: %s", path)
|
||||
# TODO should not be auto at this point, anymore
|
||||
if format_ == FORMAT_AUTO:
|
||||
if not is_supported_format(path):
|
||||
raise UnsupportedFormatError()
|
||||
@@ -509,7 +507,6 @@ def get_workspace(path, format_, sigpaths):
|
||||
return vw
|
||||
|
||||
|
||||
# TODO get_extractors -> List[FeatureExtractor]?
|
||||
def get_extractor(
|
||||
path: str,
|
||||
format_: str,
|
||||
@@ -1165,8 +1162,8 @@ def main(argv=None):
|
||||
rules = rules.filter_rules_by_meta(args.tag)
|
||||
logger.debug("selected %d rules", len(rules))
|
||||
for i, r in enumerate(rules.rules, 1):
|
||||
# TODO don't display subscope rules?
|
||||
logger.debug(" %d. %s", i, r)
|
||||
|
||||
except (IOError, capa.rules.InvalidRule, capa.rules.InvalidRuleSet) as e:
|
||||
logger.error("%s", str(e))
|
||||
logger.error(
|
||||
|
||||
@@ -22,7 +22,7 @@ def get_node_cost(node):
|
||||
# substring and regex features require a full scan of each string
|
||||
# which we anticipate is more expensive then a hash lookup feature (e.g. mnemonic or count).
|
||||
#
|
||||
# TODO: compute the average cost of these feature relative to hash feature
|
||||
# fun research: compute the average cost of these feature relative to hash feature
|
||||
# and adjust the factor accordingly.
|
||||
return 2
|
||||
|
||||
|
||||
@@ -510,7 +510,9 @@ def build_statements(d, scope: str):
|
||||
# arg is string (which doesn't support inline descriptions), like:
|
||||
#
|
||||
# count(string(error))
|
||||
# TODO: what about embedded newlines?
|
||||
#
|
||||
# known problem that embedded newlines may not work here?
|
||||
# this may become a problem (or not), so address it when encountered.
|
||||
feature = Feature(arg)
|
||||
else:
|
||||
feature = Feature()
|
||||
@@ -1190,7 +1192,6 @@ class RuleSet:
|
||||
# so thats not helpful to decide how to downselect.
|
||||
#
|
||||
# and, a global rule will never be the sole selector in a rule.
|
||||
# TODO: probably want a lint for this.
|
||||
pass
|
||||
else:
|
||||
# easy feature: hash lookup
|
||||
|
||||
@@ -54,7 +54,7 @@ var_names = ["".join(letters) for letters in itertools.product(string.ascii_lowe
|
||||
|
||||
# this have to be the internal names used by capa.py which are sometimes different to the ones written out in the rules, e.g. "2 or more" is "Some", count is Range
|
||||
unsupported = ["characteristic", "mnemonic", "offset", "subscope", "Range"]
|
||||
# TODO shorten this list, possible stuff:
|
||||
# further idea: shorten this list, possible stuff:
|
||||
# - 2 or more strings: e.g.
|
||||
# -- https://github.com/mandiant/capa-rules/blob/master/collection/file-managers/gather-direct-ftp-information.yml
|
||||
# -- https://github.com/mandiant/capa-rules/blob/master/collection/browser/gather-firefox-profile-information.yml
|
||||
@@ -172,14 +172,16 @@ def convert_rule(rule, rulename, cround, depth):
|
||||
yara_strings += "\t$" + var_name + ' = "' + string + '" ascii wide' + convert_description(kid) + "\n"
|
||||
yara_condition += "\t$" + var_name + " "
|
||||
elif s_type == "api" or s_type == "import":
|
||||
# TODO: is it possible in YARA to make a difference between api & import?
|
||||
# research needed to decide if its possible in YARA to make a difference between api & import?
|
||||
|
||||
# https://github.com/mandiant/capa-rules/blob/master/doc/format.md#api
|
||||
api = kid.value
|
||||
logger.info("doing api: %r", api)
|
||||
|
||||
# e.g. kernel32.CreateNamedPipe => look for kernel32.dll and CreateNamedPipe
|
||||
# TODO: improve .NET API call handling
|
||||
#
|
||||
# note: the handling of .NET API calls could be improved here.
|
||||
# once we have a motivation and some examples, lets do that.
|
||||
if "::" in api:
|
||||
mod, api = api.split("::")
|
||||
|
||||
@@ -204,7 +206,7 @@ def convert_rule(rule, rulename, cround, depth):
|
||||
var_name = "api_" + var_names.pop(0)
|
||||
|
||||
# limit regex with word boundary \b but also search for appended A and W
|
||||
# TODO: better use something like /(\\x00|\\x01|\\x02|\\x03|\\x04)' + api + '(A|W)?\\x00/ ???
|
||||
# alternatively: use something like /(\\x00|\\x01|\\x02|\\x03|\\x04)' + api + '(A|W)?\\x00/ ???
|
||||
yara_strings += "\t$" + var_name + " = /\\b" + api + "(A|W)?\\b/ ascii wide\n"
|
||||
yara_condition += "\t$" + var_name + " "
|
||||
|
||||
@@ -679,8 +681,6 @@ def convert_rules(rules, namespaces, cround, make_priv):
|
||||
|
||||
yara += " condition:" + condition_header + yara_condition + "\n}"
|
||||
|
||||
# TODO: now the rule is finished and could be automatically checked with the capa-testfile(s) named in meta
|
||||
# (doing it for all of them using yara-ci upload at the moment)
|
||||
output_yar(yara)
|
||||
converted_rules.append(rule_name)
|
||||
count_incomplete += incomplete
|
||||
|
||||
@@ -142,7 +142,6 @@ def main(argv=None):
|
||||
|
||||
if args.function:
|
||||
if args.format == "freeze":
|
||||
# TODO fix
|
||||
function_handles = tuple(filter(lambda fh: fh.address == args.function, function_handles))
|
||||
else:
|
||||
function_handles = tuple(filter(lambda fh: format_address(fh.address) == args.function, function_handles))
|
||||
|
||||
1
setup.py
1
setup.py
@@ -81,6 +81,7 @@ setuptools.setup(
|
||||
"flake8-logging-format==0.9.0",
|
||||
"flake8-no-implicit-concat==0.3.4",
|
||||
"flake8-print==5.0.0",
|
||||
"flake8-todos==0.3.0",
|
||||
"ruff==0.0.275",
|
||||
"black==23.3.0",
|
||||
"isort==5.11.4",
|
||||
|
||||
@@ -653,7 +653,6 @@ FEATURE_PRESENCE_TESTS = sorted(
|
||||
# insn/api: call via jmp
|
||||
("mimikatz", "function=0x40B3C6", capa.features.insn.API("LocalFree"), True),
|
||||
("c91887...", "function=0x40156F", capa.features.insn.API("CloseClipboard"), True),
|
||||
# TODO ignore thunk functions that call via jmp?
|
||||
# insn/api: resolve indirect calls
|
||||
("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.CreatePipe"), True),
|
||||
("c91887...", "function=0x401A77", capa.features.insn.API("kernel32.SetHandleInformation"), True),
|
||||
|
||||
Reference in New Issue
Block a user