mirror of
https://github.com/mandiant/capa.git
synced 2025-12-12 23:59:48 -08:00
Compare commits
273 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ead8a836be | ||
|
|
d471e66073 | ||
|
|
4ddef1f60b | ||
|
|
7b9da896e8 | ||
|
|
41786f4ab8 | ||
|
|
4661da729f | ||
|
|
97dc40a585 | ||
|
|
f2082f3f52 | ||
|
|
f87c8ced3f | ||
|
|
f914eea8ae | ||
|
|
b41d239301 | ||
|
|
8bb1a1cb5a | ||
|
|
2f61bc0b05 | ||
|
|
d22557947a | ||
|
|
3e44d07541 | ||
|
|
f56b27e1c7 | ||
|
|
12075df3ba | ||
|
|
a8bb9620e2 | ||
|
|
9ed4e21429 | ||
|
|
5b293d675f | ||
|
|
5972d6576d | ||
|
|
19ce514b5c | ||
|
|
144ed80c56 | ||
|
|
4d34e56589 | ||
|
|
9045770192 | ||
|
|
4ea21d2a9c | ||
|
|
774a188d19 | ||
|
|
bd5c125561 | ||
|
|
420feea0aa | ||
|
|
b298f547f9 | ||
|
|
a7fe76c336 | ||
|
|
9f777ba152 | ||
|
|
cc3b56ddcb | ||
|
|
0c42942a88 | ||
|
|
0803c6f3fa | ||
|
|
02d9d37c1e | ||
|
|
c121e9219c | ||
|
|
297d9aaa32 | ||
|
|
11644cbc31 | ||
|
|
4c6be15edc | ||
|
|
e1028e4dd8 | ||
|
|
861ff1c91f | ||
|
|
80bb0b4aff | ||
|
|
06d238a9f9 | ||
|
|
71ce28d9e6 | ||
|
|
c48429e5c3 | ||
|
|
34e3f7bbaf | ||
|
|
db624460bc | ||
|
|
16c12f816b | ||
|
|
ea6fed56a2 | ||
|
|
22f11f1a97 | ||
|
|
7c21ccb8f9 | ||
|
|
8f86b0eac2 | ||
|
|
9c8fa32e5c | ||
|
|
9d348c6da2 | ||
|
|
4dc87240f9 | ||
|
|
a60d11a763 | ||
|
|
391cc77996 | ||
|
|
7a3287fa25 | ||
|
|
32244b2641 | ||
|
|
122fdc69e3 | ||
|
|
39e4e47763 | ||
|
|
2ea4dc9d7e | ||
|
|
b2590e7c9a | ||
|
|
af6fe6baa0 | ||
|
|
ce799dadbe | ||
|
|
217e6f88d9 | ||
|
|
a363baffce | ||
|
|
bbe47d81e9 | ||
|
|
a105b41647 | ||
|
|
fc8919adce | ||
|
|
f21877ae27 | ||
|
|
99e7967e22 | ||
|
|
766fe9d845 | ||
|
|
2c60faee26 | ||
|
|
097f1d4695 | ||
|
|
a6efc3952f | ||
|
|
dadd76bd62 | ||
|
|
282c0c2655 | ||
|
|
14f2391f49 | ||
|
|
b5860190e3 | ||
|
|
d8ecb88867 | ||
|
|
f5b2efdc87 | ||
|
|
fab26180cb | ||
|
|
3968d40bf4 | ||
|
|
cb2d1cde36 | ||
|
|
da7a9b7232 | ||
|
|
4f15225665 | ||
|
|
90708c123b | ||
|
|
384f467d4a | ||
|
|
37064f20d1 | ||
|
|
9e579f9de3 | ||
|
|
b2c688ef14 | ||
|
|
9717acd988 | ||
|
|
d06c5b12c2 | ||
|
|
e97a120602 | ||
|
|
5b806b08dd | ||
|
|
fd5dfcc6d8 | ||
|
|
3979317b10 | ||
|
|
8d2595a6db | ||
|
|
3c2c452501 | ||
|
|
af48f86e55 | ||
|
|
73957ea14e | ||
|
|
bb824e9167 | ||
|
|
b996e77606 | ||
|
|
9a20bbd4e1 | ||
|
|
8195b7565f | ||
|
|
0569f9b242 | ||
|
|
8ffa8ea2c8 | ||
|
|
fd7cff6109 | ||
|
|
a3b292066a | ||
|
|
8f6d38468e | ||
|
|
4af5cc66ba | ||
|
|
33c3c7e106 | ||
|
|
5c75f12b78 | ||
|
|
1ae6638861 | ||
|
|
d8999471c5 | ||
|
|
90c0de1a7f | ||
|
|
d13ea1cbbe | ||
|
|
03cf28fccd | ||
|
|
8e757d2099 | ||
|
|
2989732637 | ||
|
|
db45068357 | ||
|
|
735aea86e0 | ||
|
|
d8c8c6d2f3 | ||
|
|
3b4cb47597 | ||
|
|
f55e758d47 | ||
|
|
c5a5e5600a | ||
|
|
6989e8b8cf | ||
|
|
7d2e550b84 | ||
|
|
7f17c45b69 | ||
|
|
b0c86ab8db | ||
|
|
4c0c2c75c6 | ||
|
|
1549b9b506 | ||
|
|
057eeb3629 | ||
|
|
0dea4e8b7d | ||
|
|
d3573a565c | ||
|
|
1275b49ebb | ||
|
|
56f9e16a8b | ||
|
|
a4b0954532 | ||
|
|
fc73787849 | ||
|
|
30a5493414 | ||
|
|
a729bdfbe6 | ||
|
|
dab88e482d | ||
|
|
6482f67a0c | ||
|
|
a1bf95ec2c | ||
|
|
6961fde327 | ||
|
|
c0fe0420fc | ||
|
|
2ba000a987 | ||
|
|
a90e93e150 | ||
|
|
b6ab12d3c1 | ||
|
|
71ccd87435 | ||
|
|
d07045f134 | ||
|
|
bede4a0aa1 | ||
|
|
de1cff356a | ||
|
|
1bee098fb6 | ||
|
|
e36e175e08 | ||
|
|
9db45d2fcb | ||
|
|
558f5d0c8a | ||
|
|
e32a887091 | ||
|
|
1b9a6c3c59 | ||
|
|
aef03b5592 | ||
|
|
3eaeb533e9 | ||
|
|
04cc94a450 | ||
|
|
dae7be076d | ||
|
|
3cb7573edb | ||
|
|
a96a5de12d | ||
|
|
45b6c8dad3 | ||
|
|
cf17ebac33 | ||
|
|
f0a34fdb5e | ||
|
|
e124115e8d | ||
|
|
249b8498d9 | ||
|
|
15c69e3b7d | ||
|
|
98208b8eec | ||
|
|
0690e73320 | ||
|
|
766ac7e500 | ||
|
|
51ac57c657 | ||
|
|
89603586da | ||
|
|
a35f5a1650 | ||
|
|
f1df29d27e | ||
|
|
08c24e2705 | ||
|
|
b1171864e3 | ||
|
|
5af59cecda | ||
|
|
0c3a38b24b | ||
|
|
ac5d163aa0 | ||
|
|
dfe2dbea6d | ||
|
|
909ffc187b | ||
|
|
92dfa99059 | ||
|
|
0065876702 | ||
|
|
23bf28702f | ||
|
|
066873bd06 | ||
|
|
98c00bd8b1 | ||
|
|
fd47b03fac | ||
|
|
8e689c39f4 | ||
|
|
738fa9150e | ||
|
|
5405e182c3 | ||
|
|
ab1326f858 | ||
|
|
f013815b2a | ||
|
|
5b24fc2543 | ||
|
|
b103e40ba8 | ||
|
|
d5c9a5cf3c | ||
|
|
30d7425b98 | ||
|
|
34819b289d | ||
|
|
71d9ebd859 | ||
|
|
c1910d47f0 | ||
|
|
769d354792 | ||
|
|
a7678e779e | ||
|
|
294f74b209 | ||
|
|
fa8b4a4203 | ||
|
|
7205862dbf | ||
|
|
37bc47c772 | ||
|
|
baaa8ba2c1 | ||
|
|
05f8e2445a | ||
|
|
753b003107 | ||
|
|
97092c91db | ||
|
|
20859d2796 | ||
|
|
06f8943bc4 | ||
|
|
e797a67e97 | ||
|
|
a1eca58d7a | ||
|
|
aefe97e09e | ||
|
|
59ae901f57 | ||
|
|
811f484d3b | ||
|
|
ff08b99190 | ||
|
|
44dc4efe57 | ||
|
|
f7e2ac83f2 | ||
|
|
7e60162d65 | ||
|
|
cd06ee4544 | ||
|
|
6d0a777de6 | ||
|
|
dd7a48a00c | ||
|
|
582dcef097 | ||
|
|
b9501d7b77 | ||
|
|
a523fcf804 | ||
|
|
cd07745af1 | ||
|
|
6c15881bfe | ||
|
|
7ff358ee00 | ||
|
|
79e5fad326 | ||
|
|
93f5e966b2 | ||
|
|
d0e9c004a0 | ||
|
|
4814a47560 | ||
|
|
3c81d91072 | ||
|
|
de21f9a1f9 | ||
|
|
9f4dab89a5 | ||
|
|
9def3df16f | ||
|
|
44dd56e344 | ||
|
|
e630bd06db | ||
|
|
1fbd4937bc | ||
|
|
cc54bdddc6 | ||
|
|
f750455519 | ||
|
|
3d383bcc57 | ||
|
|
cdab6eaa5d | ||
|
|
7937cb6ea3 | ||
|
|
57f5236c9b | ||
|
|
f7bdd0e7f6 | ||
|
|
a108e385fe | ||
|
|
6549c9878b | ||
|
|
a3a760e1e6 | ||
|
|
576b9be78c | ||
|
|
528548eb8c | ||
|
|
9a2415e34e | ||
|
|
c9b7162a5f | ||
|
|
7fd9ab5e88 | ||
|
|
b44edbd90e | ||
|
|
a1b3703a0d | ||
|
|
874dffc13f | ||
|
|
8b572dc63f | ||
|
|
659b29a62d | ||
|
|
7a558898e1 | ||
|
|
7dee553558 | ||
|
|
9f6f18466a | ||
|
|
ef003366da | ||
|
|
aaaadc2a47 | ||
|
|
f94287c9ae | ||
|
|
c56bfdca67 |
11
.github/mypy/mypy.ini
vendored
11
.github/mypy/mypy.ini
vendored
@@ -57,6 +57,9 @@ ignore_missing_imports = True
|
||||
[mypy-ida_funcs.*]
|
||||
ignore_missing_imports = True
|
||||
|
||||
[mypy-ida_loader.*]
|
||||
ignore_missing_imports = True
|
||||
|
||||
[mypy-PyQt5.*]
|
||||
ignore_missing_imports = True
|
||||
|
||||
@@ -64,4 +67,10 @@ ignore_missing_imports = True
|
||||
ignore_missing_imports = True
|
||||
|
||||
[mypy-pytest.*]
|
||||
ignore_missing_imports = True
|
||||
ignore_missing_imports = True
|
||||
|
||||
[mypy-devtools.*]
|
||||
ignore_missing_imports = True
|
||||
|
||||
[mypy-elftools.*]
|
||||
ignore_missing_imports = True
|
||||
|
||||
2
.github/pyinstaller/hooks/hook-vivisect.py
vendored
2
.github/pyinstaller/hooks/hook-vivisect.py
vendored
@@ -45,8 +45,8 @@ hiddenimports = [
|
||||
"vivisect.analysis.crypto",
|
||||
"vivisect.analysis.crypto.constants",
|
||||
"vivisect.analysis.elf",
|
||||
"vivisect.analysis.elf",
|
||||
"vivisect.analysis.elf.elfplt",
|
||||
"vivisect.analysis.elf.elfplt_late",
|
||||
"vivisect.analysis.elf.libc_start_main",
|
||||
"vivisect.analysis.generic",
|
||||
"vivisect.analysis.generic",
|
||||
|
||||
10
.github/workflows/build.yml
vendored
10
.github/workflows/build.yml
vendored
@@ -13,7 +13,7 @@ jobs:
|
||||
strategy:
|
||||
matrix:
|
||||
include:
|
||||
- os: ubuntu-16.04
|
||||
- os: ubuntu-18.04
|
||||
# use old linux so that the shared library versioning is more portable
|
||||
artifact_name: capa
|
||||
asset_name: linux
|
||||
@@ -33,7 +33,7 @@ jobs:
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: 3.8
|
||||
- if: matrix.os == 'ubuntu-16.04'
|
||||
- if: matrix.os == 'ubuntu-18.04'
|
||||
run: sudo apt-get install -y libyaml-dev
|
||||
- name: Install PyInstaller
|
||||
run: pip install 'pyinstaller==4.2'
|
||||
@@ -41,8 +41,12 @@ jobs:
|
||||
run: pip install -e .
|
||||
- name: Build standalone executable
|
||||
run: pyinstaller .github/pyinstaller/pyinstaller.spec
|
||||
- name: Does it run?
|
||||
- name: Does it run (PE)?
|
||||
run: dist/capa "tests/data/Practical Malware Analysis Lab 01-01.dll_"
|
||||
- name: Does it run (Shellcode)?
|
||||
run: dist/capa "tests/data/499c2a85f6e8142c3f48d4251c9c7cd6.raw32"
|
||||
- name: Does it run (ELF)?
|
||||
run: dist/capa "tests/data/7351f8a40c5450557b24622417fc478d.elf_"
|
||||
- uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: ${{ matrix.asset_name }}
|
||||
|
||||
156
CHANGELOG.md
156
CHANGELOG.md
@@ -1,5 +1,6 @@
|
||||
# Change Log
|
||||
|
||||
|
||||
## master (unreleased)
|
||||
|
||||
### New Features
|
||||
@@ -8,7 +9,7 @@
|
||||
|
||||
### New Rules (0)
|
||||
|
||||
-
|
||||
-
|
||||
|
||||
### Bug Fixes
|
||||
|
||||
@@ -17,8 +18,157 @@
|
||||
### Development
|
||||
|
||||
### Raw diffs
|
||||
- [capa v2.0.0...master](https://github.com/fireeye/capa/compare/v2.0.0...master)
|
||||
- [capa-rules v2.0.0...master](https://github.com/fireeye/capa-rules/compare/v2.0.0...master)
|
||||
- [capa <release>...master](https://github.com/fireeye/capa/compare/v3.0.2...master)
|
||||
- [capa-rules <release>...master](https://github.com/fireeye/capa-rules/compare/v3.0.2...master)
|
||||
|
||||
## v3.0.2
|
||||
|
||||
This release fixes an issue with the standalone executables built with PyInstaller when running capa against ELF files.
|
||||
|
||||
### Bug Fixes
|
||||
|
||||
- fix bug in PyInstaller config preventing ELF analysis #795 @mr-tz
|
||||
|
||||
### Raw diffs
|
||||
- [capa v3.0.1...master](https://github.com/fireeye/capa/compare/v3.0.1...v3.0.2)
|
||||
- [capa-rules v3.0.1...master](https://github.com/fireeye/capa-rules/compare/v3.0.1...v3.0.2)
|
||||
|
||||
## v3.0.1 (2021-09-27)
|
||||
|
||||
This version updates the version of vivisect used by capa. Users will experience fewer bugs and find improved analysis results.
|
||||
|
||||
Thanks to the community for highlighting issues and analysis misses. Your feedback is crucial to further improve capa.
|
||||
|
||||
### Bug Fixes
|
||||
|
||||
- fix many underlying bugs in vivisect analysis and update to version v1.0.5 #786 @williballenthin
|
||||
|
||||
### Raw diffs
|
||||
- [capa v3.0.0...v3.0.1](https://github.com/fireeye/capa/compare/v3.0.0...v3.0.1)
|
||||
- [capa-rules v3.0.0...v3.0.1](https://github.com/fireeye/capa-rules/compare/v3.0.0...v3.0.1)
|
||||
|
||||
## v3.0.0 (2021-09-15)
|
||||
|
||||
We are excited to announce version 3.0! :tada:
|
||||
|
||||
capa 3.0:
|
||||
- adds support for ELF files targeting Linux thanks to [Intezer](https://www.intezer.com/)
|
||||
- adds new features to specify OS, CPU architecture, and file format
|
||||
- fixes a few bugs that may have led to false negatives (missed capabilities) in older versions
|
||||
- adds 80 new rules, including 36 describing techniques for Linux
|
||||
|
||||
A huge thanks to everyone who submitted issues, provided feedback, and contributed code and rules.
|
||||
Special acknowledgement to @Adir-Shemesh and @TcM1911 of [Intezer](https://www.intezer.com/) for contributing the code to enable ELF support.
|
||||
Also, welcome first time contributors:
|
||||
- @jaredscottwilson
|
||||
- @cdong1012
|
||||
- @jlepore-fe
|
||||
|
||||
### New Features
|
||||
|
||||
- all: add support for ELF files #700 @Adir-Shemesh @TcM1911
|
||||
- rule format: add feature `format: ` for file format, like `format: pe` #723 @williballenthin
|
||||
- rule format: add feature `arch: ` for architecture, like `arch: amd64` #723 @williballenthin
|
||||
- rule format: add feature `os: ` for operating system, like `os: windows` #723 @williballenthin
|
||||
- rule format: add feature `substring: ` for verbatim strings with leading/trailing wildcards #737 @williballenthin
|
||||
- scripts: add `profile-memory.py` for profiling memory usage #736 @williballenthin
|
||||
- main: add light weight ELF file feature extractor to detect file limitations #770 @mr-tz
|
||||
|
||||
### Breaking Changes
|
||||
|
||||
- rules using `format`, `arch`, `os`, or `substring` features cannot be used by capa versions prior to v3
|
||||
- legacy term `arch` (i.e., "x32") is now called `bitness` @williballenthin
|
||||
- freeze format gains new section for "global" features #759 @williballenthin
|
||||
|
||||
### New Rules (80)
|
||||
|
||||
- collection/webcam/capture-webcam-image @johnk3r
|
||||
- nursery/list-drag-and-drop-files michael.hunhoff@fireeye.com
|
||||
- nursery/monitor-clipboard-content michael.hunhoff@fireeye.com
|
||||
- nursery/monitor-local-ipv4-address-changes michael.hunhoff@fireeye.com
|
||||
- nursery/load-windows-common-language-runtime michael.hunhoff@fireeye.com
|
||||
- nursery/resize-volume-shadow-copy-storage michael.hunhoff@fireeye.com
|
||||
- nursery/add-user-account-group michael.hunhoff@fireeye.com
|
||||
- nursery/add-user-account-to-group michael.hunhoff@fireeye.com
|
||||
- nursery/add-user-account michael.hunhoff@fireeye.com
|
||||
- nursery/change-user-account-password michael.hunhoff@fireeye.com
|
||||
- nursery/delete-user-account-from-group michael.hunhoff@fireeye.com
|
||||
- nursery/delete-user-account-group michael.hunhoff@fireeye.com
|
||||
- nursery/delete-user-account michael.hunhoff@fireeye.com
|
||||
- nursery/list-domain-servers michael.hunhoff@fireeye.com
|
||||
- nursery/list-groups-for-user-account michael.hunhoff@fireeye.com
|
||||
- nursery/list-user-account-groups michael.hunhoff@fireeye.com
|
||||
- nursery/list-user-accounts-for-group michael.hunhoff@fireeye.com
|
||||
- nursery/list-user-accounts michael.hunhoff@fireeye.com
|
||||
- nursery/parse-url michael.hunhoff@fireeye.com
|
||||
- nursery/register-raw-input-devices michael.hunhoff@fireeye.com
|
||||
- anti-analysis/packer/gopacker/packed-with-gopacker jared.wilson@fireeye.com
|
||||
- host-interaction/driver/create-device-object @mr-tz
|
||||
- host-interaction/process/create/execute-command @mr-tz
|
||||
- data-manipulation/encryption/create-new-key-via-cryptacquirecontext chuong.dong@fireeye.com
|
||||
- host-interaction/log/clfs/append-data-to-clfs-log-container blaine.stancill@mandiant.com
|
||||
- host-interaction/log/clfs/read-data-from-clfs-log-container blaine.stancill@mandiant.com
|
||||
- data-manipulation/encryption/hc-128/encrypt-data-using-hc-128-via-wolfssl blaine.stancill@mandiant.com
|
||||
- c2/shell/create-unix-reverse-shell joakim@intezer.com
|
||||
- c2/shell/execute-shell-command-received-from-socket joakim@intezer.com
|
||||
- collection/get-current-user joakim@intezer.com
|
||||
- host-interaction/file-system/change-file-permission joakim@intezer.com
|
||||
- host-interaction/hardware/memory/get-memory-information joakim@intezer.com
|
||||
- host-interaction/mutex/lock-file joakim@intezer.com
|
||||
- host-interaction/os/version/get-kernel-version joakim@intezer.com
|
||||
- host-interaction/os/version/get-linux-distribution joakim@intezer.com
|
||||
- host-interaction/process/terminate/terminate-process-via-kill joakim@intezer.com
|
||||
- lib/duplicate-stdin-and-stdout joakim@intezer.com
|
||||
- nursery/capture-network-configuration-via-ifconfig joakim@intezeer.com
|
||||
- nursery/collect-ssh-keys joakim@intezer.com
|
||||
- nursery/enumerate-processes-via-procfs joakim@intezer.com
|
||||
- nursery/interact-with-iptables joakim@intezer.com
|
||||
- persistence/persist-via-desktop-autostart joakim@intezer.com
|
||||
- persistence/persist-via-shell-profile-or-rc-file joakim@intezer.com
|
||||
- persistence/service/persist-via-rc-script joakim@intezer.com
|
||||
- collection/get-current-user-on-linux joakim@intezer.com
|
||||
- collection/network/get-mac-address-on-windows moritz.raabe@fireeye.com
|
||||
- host-interaction/file-system/read/read-file-on-linux moritz.raabe@fireeye.com joakim@intezer.com
|
||||
- host-interaction/file-system/read/read-file-on-windows moritz.raabe@fireeye.com
|
||||
- host-interaction/file-system/write/write-file-on-windows william.ballenthin@fireeye.com
|
||||
- host-interaction/os/info/get-system-information-on-windows moritz.raabe@fireeye.com joakim@intezer.com
|
||||
- host-interaction/process/create/create-process-on-windows moritz.raabe@fireeye.com
|
||||
- linking/runtime-linking/link-function-at-runtime-on-windows moritz.raabe@fireeye.com
|
||||
- nursery/create-process-on-linux joakim@intezer.com
|
||||
- nursery/enumerate-files-on-linux william.ballenthin@fireeye.com
|
||||
- nursery/get-mac-address-on-linux joakim@intezer.com
|
||||
- nursery/get-system-information-on-linux joakim@intezer.com
|
||||
- nursery/link-function-at-runtime-on-linux joakim@intezer.com
|
||||
- nursery/write-file-on-linux joakim@intezer.com
|
||||
- communication/socket/tcp/send/obtain-transmitpackets-callback-function-via-wsaioctl jonathan.lepore@mandiant.com
|
||||
- nursery/linked-against-cpp-http-library @mr-tz
|
||||
- nursery/linked-against-cpp-json-library @mr-tz
|
||||
|
||||
### Bug Fixes
|
||||
|
||||
- main: fix `KeyError: 0` when reporting results @williballehtin #703
|
||||
- main: fix potential false negatives due to namespaces across scopes @williballenthin #721
|
||||
- linter: suppress some warnings about imports from ntdll/ntoskrnl @williballenthin #743
|
||||
- linter: suppress some warnings about missing examples in the nursery @williballenthin #747
|
||||
|
||||
### capa explorer IDA Pro plugin
|
||||
|
||||
- explorer: add additional filter logic when displaying matches by function #686 @mike-hunhoff
|
||||
- explorer: remove duplicate check when saving file #687 @mike-hunhoff
|
||||
- explorer: update IDA extractor to use non-canon mnemonics #688 @mike-hunhoff
|
||||
- explorer: allow user to add specified number of bytes when adding a Bytes feature in the Rule Generator #689 @mike-hunhoff
|
||||
- explorer: enforce max column width Features and Editor panes #691 @mike-hunhoff
|
||||
- explorer: add option to limit features to currently selected disassembly address #692 @mike-hunhoff
|
||||
- explorer: update support documentation and runtime checks #741 @mike-hunhoff
|
||||
- explorer: small performance boost to rule generator search functionality #742 @mike-hunhoff
|
||||
- explorer: add support for arch, os, and format features #758 @mike-hunhoff
|
||||
- explorer: improve parsing algorithm for rule generator feature editor #768 @mike-hunhoff
|
||||
|
||||
### Development
|
||||
|
||||
### Raw diffs
|
||||
- [capa v2.0.0...v3.0.0](https://github.com/fireeye/capa/compare/v2.0.0...v3.0.0)
|
||||
- [capa-rules v2.0.0...v3.0.0](https://github.com/fireeye/capa-rules/compare/v2.0.0...v3.0.0)
|
||||
|
||||
|
||||
## v2.0.0 (2021-07-19)
|
||||
|
||||
@@ -2,16 +2,19 @@
|
||||
|
||||
[](https://pypi.org/project/flare-capa)
|
||||
[](https://github.com/fireeye/capa/releases)
|
||||
[](https://github.com/fireeye/capa-rules)
|
||||
[](https://github.com/fireeye/capa-rules)
|
||||
[](https://github.com/fireeye/capa/actions?query=workflow%3ACI+event%3Apush+branch%3Amaster)
|
||||
[](https://github.com/fireeye/capa/releases)
|
||||
[](LICENSE.txt)
|
||||
|
||||
capa detects capabilities in executable files.
|
||||
You run it against a PE file or shellcode and it tells you what it thinks the program can do.
|
||||
You run it against a PE, ELF, or shellcode file and it tells you what it thinks the program can do.
|
||||
For example, it might suggest that the file is a backdoor, is capable of installing services, or relies on HTTP to communicate.
|
||||
|
||||
Check out the overview in our first [capa blog post](https://www.fireeye.com/blog/threat-research/2020/07/capa-automatically-identify-malware-capabilities.html).
|
||||
Check out:
|
||||
- the overview in our first [capa blog post](https://www.fireeye.com/blog/threat-research/2020/07/capa-automatically-identify-malware-capabilities.html)
|
||||
- the major version 2.0 updates described in our [second blog post](https://www.fireeye.com/blog/threat-research/2021/07/capa-2-better-stronger-faster.html)
|
||||
- the major version 3.0 (ELF support) described in the [third blog post](https://www.fireeye.com/blog/threat-research/2021/09/elfant-in-the-room-capa-v3.html)
|
||||
|
||||
```
|
||||
$ capa.exe suspicious.exe
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
import copy
|
||||
import collections
|
||||
from typing import Set, Dict, List, Tuple, Union, Mapping
|
||||
from typing import Set, Dict, List, Tuple, Union, Mapping, Iterable
|
||||
|
||||
import capa.rules
|
||||
import capa.features.common
|
||||
@@ -228,6 +228,23 @@ class Subscope(Statement):
|
||||
MatchResults = Mapping[str, List[Tuple[int, Result]]]
|
||||
|
||||
|
||||
def index_rule_matches(features: FeatureSet, rule: "capa.rules.Rule", locations: Iterable[int]):
|
||||
"""
|
||||
record into the given featureset that the given rule matched at the given locations.
|
||||
|
||||
naively, this is just adding a MatchedRule feature;
|
||||
however, we also want to record matches for the rule's namespaces.
|
||||
|
||||
updates `features` in-place. doesn't modify the remaining arguments.
|
||||
"""
|
||||
features[capa.features.common.MatchedRule(rule.name)].update(locations)
|
||||
namespace = rule.meta.get("namespace")
|
||||
if namespace:
|
||||
while namespace:
|
||||
features[capa.features.common.MatchedRule(namespace)].update(locations)
|
||||
namespace, _, _ = namespace.rpartition("/")
|
||||
|
||||
|
||||
def match(rules: List["capa.rules.Rule"], features: FeatureSet, va: int) -> Tuple[FeatureSet, MatchResults]:
|
||||
"""
|
||||
Args:
|
||||
@@ -237,7 +254,7 @@ def match(rules: List["capa.rules.Rule"], features: FeatureSet, va: int) -> Tupl
|
||||
|
||||
Returns:
|
||||
Tuple[FeatureSet, MatchResults]: two-tuple with entries:
|
||||
- set of features used for matching (which may be greater than argument, due to rule match features), and
|
||||
- set of features used for matching (which may be a superset of the given `features` argument, due to rule match features), and
|
||||
- mapping from rule name to [(location of match, result object)]
|
||||
"""
|
||||
results = collections.defaultdict(list) # type: MatchResults
|
||||
@@ -252,12 +269,9 @@ def match(rules: List["capa.rules.Rule"], features: FeatureSet, va: int) -> Tupl
|
||||
res = rule.evaluate(features)
|
||||
if res:
|
||||
results[rule.name].append((va, res))
|
||||
features[capa.features.common.MatchedRule(rule.name)].add(va)
|
||||
|
||||
namespace = rule.meta.get("namespace")
|
||||
if namespace:
|
||||
while namespace:
|
||||
features[capa.features.common.MatchedRule(namespace)].add(va)
|
||||
namespace, _, _ = namespace.rpartition("/")
|
||||
# we need to update the current `features`
|
||||
# because subsequent iterations of this loop may use newly added features,
|
||||
# such as rule or namespace matches.
|
||||
index_rule_matches(features, rule, [va])
|
||||
|
||||
return (features, results)
|
||||
|
||||
@@ -14,6 +14,7 @@ from typing import Set, Dict, Union
|
||||
|
||||
import capa.engine
|
||||
import capa.features
|
||||
import capa.features.extractors.elf
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
MAX_BYTES_FEATURE_SIZE = 0x100
|
||||
@@ -21,12 +22,6 @@ MAX_BYTES_FEATURE_SIZE = 0x100
|
||||
# thunks may be chained so we specify a delta to control the depth to which these chains are explored
|
||||
THUNK_CHAIN_DEPTH_DELTA = 5
|
||||
|
||||
# identifiers for supported architectures names that tweak a feature
|
||||
# for example, offset/x32
|
||||
ARCH_X32 = "x32"
|
||||
ARCH_X64 = "x64"
|
||||
VALID_ARCH = (ARCH_X32, ARCH_X64)
|
||||
|
||||
|
||||
def bytes_to_str(b: bytes) -> str:
|
||||
return str(codecs.encode(b, "hex").decode("utf-8"))
|
||||
@@ -52,33 +47,33 @@ def escape_string(s: str) -> str:
|
||||
|
||||
|
||||
class Feature:
|
||||
def __init__(self, value: Union[str, int, bytes], arch=None, description=None):
|
||||
def __init__(self, value: Union[str, int, bytes], bitness=None, description=None):
|
||||
"""
|
||||
Args:
|
||||
value (any): the value of the feature, such as the number or string.
|
||||
arch (str): one of the VALID_ARCH values, or None.
|
||||
When None, then the feature applies to any architecture.
|
||||
Modifies the feature name from `feature` to `feature/arch`, like `offset/x32`.
|
||||
bitness (str): one of the VALID_BITNESS values, or None.
|
||||
When None, then the feature applies to any bitness.
|
||||
Modifies the feature name from `feature` to `feature/bitness`, like `offset/x32`.
|
||||
description (str): a human-readable description that explains the feature value.
|
||||
"""
|
||||
super(Feature, self).__init__()
|
||||
|
||||
if arch is not None:
|
||||
if arch not in VALID_ARCH:
|
||||
raise ValueError("arch '%s' must be one of %s" % (arch, VALID_ARCH))
|
||||
self.name = self.__class__.__name__.lower() + "/" + arch
|
||||
if bitness is not None:
|
||||
if bitness not in VALID_BITNESS:
|
||||
raise ValueError("bitness '%s' must be one of %s" % (bitness, VALID_BITNESS))
|
||||
self.name = self.__class__.__name__.lower() + "/" + bitness
|
||||
else:
|
||||
self.name = self.__class__.__name__.lower()
|
||||
|
||||
self.value = value
|
||||
self.arch = arch
|
||||
self.bitness = bitness
|
||||
self.description = description
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.name, self.value, self.arch))
|
||||
return hash((self.name, self.value, self.bitness))
|
||||
|
||||
def __eq__(self, other):
|
||||
return self.name == other.name and self.value == other.value and self.arch == other.arch
|
||||
return self.name == other.name and self.value == other.value and self.bitness == other.bitness
|
||||
|
||||
def get_value_str(self) -> str:
|
||||
"""
|
||||
@@ -105,8 +100,8 @@ class Feature:
|
||||
return capa.engine.Result(self in ctx, self, [], locations=ctx.get(self, []))
|
||||
|
||||
def freeze_serialize(self):
|
||||
if self.arch is not None:
|
||||
return (self.__class__.__name__, [self.value, {"arch": self.arch}])
|
||||
if self.bitness is not None:
|
||||
return (self.__class__.__name__, [self.value, {"bitness": self.bitness}])
|
||||
else:
|
||||
return (self.__class__.__name__, [self.value])
|
||||
|
||||
@@ -131,6 +126,7 @@ class MatchedRule(Feature):
|
||||
|
||||
class Characteristic(Feature):
|
||||
def __init__(self, value: str, description=None):
|
||||
|
||||
super(Characteristic, self).__init__(value, description=description)
|
||||
|
||||
|
||||
@@ -139,6 +135,77 @@ class String(Feature):
|
||||
super(String, self).__init__(value, description=description)
|
||||
|
||||
|
||||
class Substring(String):
|
||||
def __init__(self, value: str, description=None):
|
||||
super(Substring, self).__init__(value, description=description)
|
||||
self.value = value
|
||||
|
||||
def evaluate(self, ctx):
|
||||
# mapping from string value to list of locations.
|
||||
# will unique the locations later on.
|
||||
matches = collections.defaultdict(list)
|
||||
|
||||
for feature, locations in ctx.items():
|
||||
if not isinstance(feature, (String,)):
|
||||
continue
|
||||
|
||||
if not isinstance(feature.value, str):
|
||||
# this is a programming error: String should only contain str
|
||||
raise ValueError("unexpected feature value type")
|
||||
|
||||
if self.value in feature.value:
|
||||
matches[feature.value].extend(locations)
|
||||
|
||||
if matches:
|
||||
# finalize: defaultdict -> dict
|
||||
# which makes json serialization easier
|
||||
matches = dict(matches)
|
||||
|
||||
# collect all locations
|
||||
locations = set()
|
||||
for s in matches.keys():
|
||||
matches[s] = list(set(matches[s]))
|
||||
locations.update(matches[s])
|
||||
|
||||
# unlike other features, we cannot return put a reference to `self` directly in a `Result`.
|
||||
# this is because `self` may match on many strings, so we can't stuff the matched value into it.
|
||||
# instead, return a new instance that has a reference to both the substring and the matched values.
|
||||
return capa.engine.Result(True, _MatchedSubstring(self, matches), [], locations=locations)
|
||||
else:
|
||||
return capa.engine.Result(False, _MatchedSubstring(self, None), [])
|
||||
|
||||
def __str__(self):
|
||||
return "substring(%s)" % self.value
|
||||
|
||||
|
||||
class _MatchedSubstring(Substring):
|
||||
"""
|
||||
this represents specific match instances of a substring feature.
|
||||
treat it the same as a `Substring` except it has the `matches` field that contains the complete strings that matched.
|
||||
|
||||
note: this type should only ever be constructed by `Substring.evaluate()`. it is not part of the public API.
|
||||
"""
|
||||
|
||||
def __init__(self, substring: Substring, matches):
|
||||
"""
|
||||
args:
|
||||
substring (Substring): the substring feature that matches.
|
||||
match (Dict[string, List[int]]|None): mapping from matching string to its locations.
|
||||
"""
|
||||
super(_MatchedSubstring, self).__init__(str(substring.value), description=substring.description)
|
||||
# we want this to collide with the name of `Substring` above,
|
||||
# so that it works nicely with the renderers.
|
||||
self.name = "substring"
|
||||
# this may be None if the substring doesn't match
|
||||
self.matches = matches
|
||||
|
||||
def __str__(self):
|
||||
return 'substring("%s", matches = %s)' % (
|
||||
self.value,
|
||||
", ".join(map(lambda s: '"' + s + '"', (self.matches or {}).keys())),
|
||||
)
|
||||
|
||||
|
||||
class Regex(String):
|
||||
def __init__(self, value: str, description=None):
|
||||
super(Regex, self).__init__(value, description=description)
|
||||
@@ -260,3 +327,54 @@ class Bytes(Feature):
|
||||
@classmethod
|
||||
def freeze_deserialize(cls, args):
|
||||
return cls(*[codecs.decode(x, "hex") for x in args])
|
||||
|
||||
|
||||
# identifiers for supported bitness names that tweak a feature
|
||||
# for example, offset/x32
|
||||
BITNESS_X32 = "x32"
|
||||
BITNESS_X64 = "x64"
|
||||
VALID_BITNESS = (BITNESS_X32, BITNESS_X64)
|
||||
|
||||
|
||||
# other candidates here: https://docs.microsoft.com/en-us/windows/win32/debug/pe-format#machine-types
|
||||
ARCH_I386 = "i386"
|
||||
ARCH_AMD64 = "amd64"
|
||||
VALID_ARCH = (ARCH_I386, ARCH_AMD64)
|
||||
|
||||
|
||||
class Arch(Feature):
|
||||
def __init__(self, value: str, description=None):
|
||||
super(Arch, self).__init__(value, description=description)
|
||||
self.name = "arch"
|
||||
|
||||
|
||||
OS_WINDOWS = "windows"
|
||||
OS_LINUX = "linux"
|
||||
OS_MACOS = "macos"
|
||||
VALID_OS = {os.value for os in capa.features.extractors.elf.OS}
|
||||
VALID_OS.update({OS_WINDOWS, OS_LINUX, OS_MACOS})
|
||||
|
||||
|
||||
class OS(Feature):
|
||||
def __init__(self, value: str, description=None):
|
||||
super(OS, self).__init__(value, description=description)
|
||||
self.name = "os"
|
||||
|
||||
|
||||
FORMAT_PE = "pe"
|
||||
FORMAT_ELF = "elf"
|
||||
VALID_FORMAT = (FORMAT_PE, FORMAT_ELF)
|
||||
|
||||
|
||||
class Format(Feature):
|
||||
def __init__(self, value: str, description=None):
|
||||
super(Format, self).__init__(value, description=description)
|
||||
self.name = "format"
|
||||
|
||||
|
||||
def is_global_feature(feature):
|
||||
"""
|
||||
is this a feature that is extracted at every scope?
|
||||
today, these are OS and arch features.
|
||||
"""
|
||||
return isinstance(feature, (OS, Arch))
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
import abc
|
||||
from typing import Tuple, Iterator, SupportsInt
|
||||
|
||||
from capa.features.basicblock import Feature
|
||||
from capa.features.common import Feature
|
||||
|
||||
# feature extractors may reference functions, BBs, insns by opaque handle values.
|
||||
# the only requirement of these handles are that they support `__int__`,
|
||||
@@ -57,7 +57,23 @@ class FeatureExtractor:
|
||||
"""
|
||||
fetch the preferred load address at which the sample was analyzed.
|
||||
"""
|
||||
raise NotImplemented
|
||||
raise NotImplementedError()
|
||||
|
||||
@abc.abstractmethod
|
||||
def extract_global_features(self) -> Iterator[Tuple[Feature, int]]:
|
||||
"""
|
||||
extract features found at every scope ("global").
|
||||
|
||||
example::
|
||||
|
||||
extractor = VivisectFeatureExtractor(vw, path)
|
||||
for feature, va in extractor.get_global_features():
|
||||
print('0x%x: %s', va, feature)
|
||||
|
||||
yields:
|
||||
Tuple[Feature, int]: feature and its location
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
@abc.abstractmethod
|
||||
def extract_file_features(self) -> Iterator[Tuple[Feature, int]]:
|
||||
@@ -73,7 +89,7 @@ class FeatureExtractor:
|
||||
yields:
|
||||
Tuple[Feature, int]: feature and its location
|
||||
"""
|
||||
raise NotImplemented
|
||||
raise NotImplementedError()
|
||||
|
||||
@abc.abstractmethod
|
||||
def get_functions(self) -> Iterator[FunctionHandle]:
|
||||
@@ -81,7 +97,7 @@ class FeatureExtractor:
|
||||
enumerate the functions and provide opaque values that will
|
||||
subsequently be provided to `.extract_function_features()`, etc.
|
||||
"""
|
||||
raise NotImplemented
|
||||
raise NotImplementedError()
|
||||
|
||||
def is_library_function(self, va: int) -> bool:
|
||||
"""
|
||||
@@ -137,7 +153,7 @@ class FeatureExtractor:
|
||||
yields:
|
||||
Tuple[Feature, int]: feature and its location
|
||||
"""
|
||||
raise NotImplemented
|
||||
raise NotImplementedError()
|
||||
|
||||
@abc.abstractmethod
|
||||
def get_basic_blocks(self, f: FunctionHandle) -> Iterator[BBHandle]:
|
||||
@@ -145,7 +161,7 @@ class FeatureExtractor:
|
||||
enumerate the basic blocks in the given function and provide opaque values that will
|
||||
subsequently be provided to `.extract_basic_block_features()`, etc.
|
||||
"""
|
||||
raise NotImplemented
|
||||
raise NotImplementedError()
|
||||
|
||||
@abc.abstractmethod
|
||||
def extract_basic_block_features(self, f: FunctionHandle, bb: BBHandle) -> Iterator[Tuple[Feature, int]]:
|
||||
@@ -168,7 +184,7 @@ class FeatureExtractor:
|
||||
yields:
|
||||
Tuple[Feature, int]: feature and its location
|
||||
"""
|
||||
raise NotImplemented
|
||||
raise NotImplementedError()
|
||||
|
||||
@abc.abstractmethod
|
||||
def get_instructions(self, f: FunctionHandle, bb: BBHandle) -> Iterator[InsnHandle]:
|
||||
@@ -176,7 +192,7 @@ class FeatureExtractor:
|
||||
enumerate the instructions in the given basic block and provide opaque values that will
|
||||
subsequently be provided to `.extract_insn_features()`, etc.
|
||||
"""
|
||||
raise NotImplemented
|
||||
raise NotImplementedError()
|
||||
|
||||
@abc.abstractmethod
|
||||
def extract_insn_features(self, f: FunctionHandle, bb: BBHandle, insn: InsnHandle) -> Iterator[Tuple[Feature, int]]:
|
||||
@@ -201,7 +217,7 @@ class FeatureExtractor:
|
||||
yields:
|
||||
Tuple[Feature, int]: feature and its location
|
||||
"""
|
||||
raise NotImplemented
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
class NullFeatureExtractor(FeatureExtractor):
|
||||
@@ -216,6 +232,10 @@ class NullFeatureExtractor(FeatureExtractor):
|
||||
|
||||
extractor = NullFeatureExtractor({
|
||||
'base address: 0x401000,
|
||||
'global features': [
|
||||
(0x0, capa.features.Arch('i386')),
|
||||
(0x0, capa.features.OS('linux')),
|
||||
],
|
||||
'file features': [
|
||||
(0x402345, capa.features.Characteristic('embedded pe')),
|
||||
],
|
||||
@@ -253,6 +273,11 @@ class NullFeatureExtractor(FeatureExtractor):
|
||||
def get_base_address(self):
|
||||
return self.features["base address"]
|
||||
|
||||
def extract_global_features(self):
|
||||
for p in self.features.get("global features", []):
|
||||
va, feature = p
|
||||
yield feature, va
|
||||
|
||||
def extract_file_features(self):
|
||||
for p in self.features.get("file features", []):
|
||||
va, feature = p
|
||||
|
||||
95
capa/features/extractors/common.py
Normal file
95
capa/features/extractors/common.py
Normal file
@@ -0,0 +1,95 @@
|
||||
import io
|
||||
import logging
|
||||
import binascii
|
||||
import contextlib
|
||||
|
||||
import pefile
|
||||
|
||||
import capa.features
|
||||
import capa.features.extractors.elf
|
||||
import capa.features.extractors.pefile
|
||||
from capa.features.common import OS, FORMAT_PE, FORMAT_ELF, OS_WINDOWS, Arch, Format, String
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def extract_file_strings(buf, **kwargs):
|
||||
"""
|
||||
extract ASCII and UTF-16 LE strings from file
|
||||
"""
|
||||
for s in capa.features.extractors.strings.extract_ascii_strings(buf):
|
||||
yield String(s.s), s.offset
|
||||
|
||||
for s in capa.features.extractors.strings.extract_unicode_strings(buf):
|
||||
yield String(s.s), s.offset
|
||||
|
||||
|
||||
def extract_format(buf):
|
||||
if buf.startswith(b"MZ"):
|
||||
yield Format(FORMAT_PE), 0x0
|
||||
elif buf.startswith(b"\x7fELF"):
|
||||
yield Format(FORMAT_ELF), 0x0
|
||||
else:
|
||||
# we likely end up here:
|
||||
# 1. handling a file format (e.g. macho)
|
||||
#
|
||||
# for (1), this logic will need to be updated as the format is implemented.
|
||||
logger.debug("unsupported file format: %s", binascii.hexlify(buf[:4]).decode("ascii"))
|
||||
return
|
||||
|
||||
|
||||
def extract_arch(buf):
|
||||
if buf.startswith(b"MZ"):
|
||||
yield from capa.features.extractors.pefile.extract_file_arch(pe=pefile.PE(data=buf))
|
||||
|
||||
elif buf.startswith(b"\x7fELF"):
|
||||
with contextlib.closing(io.BytesIO(buf)) as f:
|
||||
arch = capa.features.extractors.elf.detect_elf_arch(f)
|
||||
|
||||
if arch not in capa.features.common.VALID_ARCH:
|
||||
logger.debug("unsupported arch: %s", arch)
|
||||
return
|
||||
|
||||
yield Arch(arch), 0x0
|
||||
|
||||
else:
|
||||
# we likely end up here:
|
||||
# 1. handling shellcode, or
|
||||
# 2. handling a new file format (e.g. macho)
|
||||
#
|
||||
# for (1) we can't do much - its shellcode and all bets are off.
|
||||
# we could maybe accept a futher CLI argument to specify the arch,
|
||||
# but i think this would be rarely used.
|
||||
# rules that rely on arch conditions will fail to match on shellcode.
|
||||
#
|
||||
# for (2), this logic will need to be updated as the format is implemented.
|
||||
logger.debug("unsupported file format: %s, will not guess Arch", binascii.hexlify(buf[:4]).decode("ascii"))
|
||||
return
|
||||
|
||||
|
||||
def extract_os(buf):
|
||||
if buf.startswith(b"MZ"):
|
||||
yield OS(OS_WINDOWS), 0x0
|
||||
elif buf.startswith(b"\x7fELF"):
|
||||
with contextlib.closing(io.BytesIO(buf)) as f:
|
||||
os = capa.features.extractors.elf.detect_elf_os(f)
|
||||
|
||||
if os not in capa.features.common.VALID_OS:
|
||||
logger.debug("unsupported os: %s", os)
|
||||
return
|
||||
|
||||
yield OS(os), 0x0
|
||||
|
||||
else:
|
||||
# we likely end up here:
|
||||
# 1. handling shellcode, or
|
||||
# 2. handling a new file format (e.g. macho)
|
||||
#
|
||||
# for (1) we can't do much - its shellcode and all bets are off.
|
||||
# we could maybe accept a futher CLI argument to specify the OS,
|
||||
# but i think this would be rarely used.
|
||||
# rules that rely on OS conditions will fail to match on shellcode.
|
||||
#
|
||||
# for (2), this logic will need to be updated as the format is implemented.
|
||||
logger.debug("unsupported file format: %s, will not guess OS", binascii.hexlify(buf[:4]).decode("ascii"))
|
||||
return
|
||||
276
capa/features/extractors/elf.py
Normal file
276
capa/features/extractors/elf.py
Normal file
@@ -0,0 +1,276 @@
|
||||
# Copyright (C) 2020 FireEye, Inc. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import struct
|
||||
import logging
|
||||
from enum import Enum
|
||||
from typing import BinaryIO
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def align(v, alignment):
|
||||
remainder = v % alignment
|
||||
if remainder == 0:
|
||||
return v
|
||||
else:
|
||||
return v + (alignment - remainder)
|
||||
|
||||
|
||||
class CorruptElfFile(ValueError):
|
||||
pass
|
||||
|
||||
|
||||
class OS(str, Enum):
|
||||
HPUX = "hpux"
|
||||
NETBSD = "netbsd"
|
||||
LINUX = "linux"
|
||||
HURD = "hurd"
|
||||
_86OPEN = "86open"
|
||||
SOLARIS = "solaris"
|
||||
AIX = "aix"
|
||||
IRIX = "irix"
|
||||
FREEBSD = "freebsd"
|
||||
TRU64 = "tru64"
|
||||
MODESTO = "modesto"
|
||||
OPENBSD = "openbsd"
|
||||
OPENVMS = "openvms"
|
||||
NSK = "nsk"
|
||||
AROS = "aros"
|
||||
FENIXOS = "fenixos"
|
||||
CLOUD = "cloud"
|
||||
SYLLABLE = "syllable"
|
||||
NACL = "nacl"
|
||||
|
||||
|
||||
def detect_elf_os(f: BinaryIO) -> str:
|
||||
f.seek(0x0)
|
||||
file_header = f.read(0x40)
|
||||
|
||||
# we'll set this to the detected OS
|
||||
# prefer the first heuristics,
|
||||
# but rather than short circuiting,
|
||||
# we'll still parse out the remainder, for debugging.
|
||||
ret = None
|
||||
|
||||
if not file_header.startswith(b"\x7fELF"):
|
||||
raise CorruptElfFile("missing magic header")
|
||||
|
||||
ei_class, ei_data = struct.unpack_from("BB", file_header, 4)
|
||||
logger.debug("ei_class: 0x%02x ei_data: 0x%02x", ei_class, ei_data)
|
||||
if ei_class == 1:
|
||||
bitness = 32
|
||||
elif ei_class == 2:
|
||||
bitness = 64
|
||||
else:
|
||||
raise CorruptElfFile("invalid ei_class: 0x%02x" % ei_class)
|
||||
|
||||
if ei_data == 1:
|
||||
endian = "<"
|
||||
elif ei_data == 2:
|
||||
endian = ">"
|
||||
else:
|
||||
raise CorruptElfFile("not an ELF file: invalid ei_data: 0x%02x" % ei_data)
|
||||
|
||||
if bitness == 32:
|
||||
(e_phoff,) = struct.unpack_from(endian + "I", file_header, 0x1C)
|
||||
e_phentsize, e_phnum = struct.unpack_from(endian + "HH", file_header, 0x2A)
|
||||
elif bitness == 64:
|
||||
(e_phoff,) = struct.unpack_from(endian + "Q", file_header, 0x20)
|
||||
e_phentsize, e_phnum = struct.unpack_from(endian + "HH", file_header, 0x36)
|
||||
else:
|
||||
raise NotImplementedError()
|
||||
|
||||
logger.debug("e_phoff: 0x%02x e_phentsize: 0x%02x e_phnum: %d", e_phoff, e_phentsize, e_phnum)
|
||||
|
||||
(ei_osabi,) = struct.unpack_from(endian + "B", file_header, 7)
|
||||
OSABI = {
|
||||
# via pyelftools: https://github.com/eliben/pyelftools/blob/0664de05ed2db3d39041e2d51d19622a8ef4fb0f/elftools/elf/enums.py#L35-L58
|
||||
# some candidates are commented out because the are not useful values,
|
||||
# at least when guessing OSes
|
||||
# 0: "SYSV", # too often used when OS is not SYSV
|
||||
1: OS.HPUX,
|
||||
2: OS.NETBSD,
|
||||
3: OS.LINUX,
|
||||
4: OS.HURD,
|
||||
5: OS._86OPEN,
|
||||
6: OS.SOLARIS,
|
||||
7: OS.AIX,
|
||||
8: OS.IRIX,
|
||||
9: OS.FREEBSD,
|
||||
10: OS.TRU64,
|
||||
11: OS.MODESTO,
|
||||
12: OS.OPENBSD,
|
||||
13: OS.OPENVMS,
|
||||
14: OS.NSK,
|
||||
15: OS.AROS,
|
||||
16: OS.FENIXOS,
|
||||
17: OS.CLOUD,
|
||||
# 53: "SORTFIX", # i can't find any reference to this OS, i dont think it exists
|
||||
# 64: "ARM_AEABI", # not an OS
|
||||
# 97: "ARM", # not an OS
|
||||
# 255: "STANDALONE", # not an OS
|
||||
}
|
||||
logger.debug("ei_osabi: 0x%02x (%s)", ei_osabi, OSABI.get(ei_osabi, "unknown"))
|
||||
|
||||
# os_osabi == 0 is commonly set even when the OS is not SYSV.
|
||||
# other values are unused or unknown.
|
||||
if ei_osabi in OSABI and ei_osabi != 0x0:
|
||||
# subsequent strategies may overwrite this value
|
||||
ret = OSABI[ei_osabi]
|
||||
|
||||
f.seek(e_phoff)
|
||||
program_header_size = e_phnum * e_phentsize
|
||||
program_headers = f.read(program_header_size)
|
||||
if len(program_headers) != program_header_size:
|
||||
logger.warning("failed to read program headers")
|
||||
e_phnum = 0
|
||||
|
||||
# search for PT_NOTE sections that specify an OS
|
||||
# for example, on Linux there is a GNU section with minimum kernel version
|
||||
for i in range(e_phnum):
|
||||
offset = i * e_phentsize
|
||||
phent = program_headers[offset : offset + e_phentsize]
|
||||
|
||||
PT_NOTE = 0x4
|
||||
|
||||
(p_type,) = struct.unpack_from(endian + "I", phent, 0x0)
|
||||
logger.debug("p_type: 0x%04x", p_type)
|
||||
if p_type != PT_NOTE:
|
||||
continue
|
||||
|
||||
if bitness == 32:
|
||||
p_offset, _, _, p_filesz = struct.unpack_from(endian + "IIII", phent, 0x4)
|
||||
elif bitness == 64:
|
||||
p_offset, _, _, p_filesz = struct.unpack_from(endian + "QQQQ", phent, 0x8)
|
||||
else:
|
||||
raise NotImplementedError()
|
||||
|
||||
logger.debug("p_offset: 0x%02x p_filesz: 0x%04x", p_offset, p_filesz)
|
||||
|
||||
f.seek(p_offset)
|
||||
note = f.read(p_filesz)
|
||||
if len(note) != p_filesz:
|
||||
logger.warning("failed to read note content")
|
||||
continue
|
||||
|
||||
namesz, descsz, type_ = struct.unpack_from(endian + "III", note, 0x0)
|
||||
name_offset = 0xC
|
||||
desc_offset = name_offset + align(namesz, 0x4)
|
||||
|
||||
logger.debug("namesz: 0x%02x descsz: 0x%02x type: 0x%04x", namesz, descsz, type_)
|
||||
|
||||
name = note[name_offset : name_offset + namesz].partition(b"\x00")[0].decode("ascii")
|
||||
logger.debug("name: %s", name)
|
||||
|
||||
if type_ != 1:
|
||||
continue
|
||||
|
||||
if name == "GNU":
|
||||
if descsz < 16:
|
||||
continue
|
||||
|
||||
desc = note[desc_offset : desc_offset + descsz]
|
||||
abi_tag, kmajor, kminor, kpatch = struct.unpack_from(endian + "IIII", desc, 0x0)
|
||||
# via readelf: https://github.com/bminor/binutils-gdb/blob/c0e94211e1ac05049a4ce7c192c9d14d1764eb3e/binutils/readelf.c#L19635-L19658
|
||||
# and here: https://github.com/bminor/binutils-gdb/blob/34c54daa337da9fadf87d2706d6a590ae1f88f4d/include/elf/common.h#L933-L939
|
||||
GNU_ABI_TAG = {
|
||||
0: OS.LINUX,
|
||||
1: OS.HURD,
|
||||
2: OS.SOLARIS,
|
||||
3: OS.FREEBSD,
|
||||
4: OS.NETBSD,
|
||||
5: OS.SYLLABLE,
|
||||
6: OS.NACL,
|
||||
}
|
||||
logger.debug("GNU_ABI_TAG: 0x%02x", abi_tag)
|
||||
|
||||
if abi_tag in GNU_ABI_TAG:
|
||||
# update only if not set
|
||||
# so we can get the debugging output of subsequent strategies
|
||||
ret = GNU_ABI_TAG[abi_tag] if not ret else ret
|
||||
logger.debug("abi tag: %s earliest compatible kernel: %d.%d.%d", ret, kmajor, kminor, kpatch)
|
||||
elif name == "OpenBSD":
|
||||
logger.debug("note owner: %s", "OPENBSD")
|
||||
ret = OS.OPENBSD if not ret else ret
|
||||
elif name == "NetBSD":
|
||||
logger.debug("note owner: %s", "NETBSD")
|
||||
ret = OS.NETBSD if not ret else ret
|
||||
elif name == "FreeBSD":
|
||||
logger.debug("note owner: %s", "FREEBSD")
|
||||
ret = OS.FREEBSD if not ret else ret
|
||||
|
||||
# search for recognizable dynamic linkers (interpreters)
|
||||
# for example, on linux, we see file paths like: /lib64/ld-linux-x86-64.so.2
|
||||
for i in range(e_phnum):
|
||||
offset = i * e_phentsize
|
||||
phent = program_headers[offset : offset + e_phentsize]
|
||||
|
||||
PT_INTERP = 0x3
|
||||
|
||||
(p_type,) = struct.unpack_from(endian + "I", phent, 0x0)
|
||||
if p_type != PT_INTERP:
|
||||
continue
|
||||
|
||||
if bitness == 32:
|
||||
p_offset, _, _, p_filesz = struct.unpack_from(endian + "IIII", phent, 0x4)
|
||||
elif bitness == 64:
|
||||
p_offset, _, _, p_filesz = struct.unpack_from(endian + "QQQQ", phent, 0x8)
|
||||
else:
|
||||
raise NotImplementedError()
|
||||
|
||||
f.seek(p_offset)
|
||||
interp = f.read(p_filesz)
|
||||
if len(interp) != p_filesz:
|
||||
logger.warning("failed to read interp content")
|
||||
continue
|
||||
|
||||
linker = interp.partition(b"\x00")[0].decode("ascii")
|
||||
logger.debug("linker: %s", linker)
|
||||
if "ld-linux" in linker:
|
||||
# update only if not set
|
||||
# so we can get the debugging output of subsequent strategies
|
||||
ret = OS.LINUX if ret is None else ret
|
||||
|
||||
return ret.value if ret is not None else "unknown"
|
||||
|
||||
|
||||
class Arch(str, Enum):
|
||||
I386 = "i386"
|
||||
AMD64 = "amd64"
|
||||
|
||||
|
||||
def detect_elf_arch(f: BinaryIO) -> str:
|
||||
f.seek(0x0)
|
||||
file_header = f.read(0x40)
|
||||
|
||||
if not file_header.startswith(b"\x7fELF"):
|
||||
raise CorruptElfFile("missing magic header")
|
||||
|
||||
(ei_data,) = struct.unpack_from("B", file_header, 5)
|
||||
logger.debug("ei_data: 0x%02x", ei_data)
|
||||
|
||||
if ei_data == 1:
|
||||
endian = "<"
|
||||
elif ei_data == 2:
|
||||
endian = ">"
|
||||
else:
|
||||
raise CorruptElfFile("not an ELF file: invalid ei_data: 0x%02x" % ei_data)
|
||||
|
||||
(ei_machine,) = struct.unpack_from(endian + "H", file_header, 0x12)
|
||||
logger.debug("ei_machine: 0x%02x", ei_machine)
|
||||
|
||||
EM_386 = 0x3
|
||||
EM_X86_64 = 0x3E
|
||||
if ei_machine == EM_386:
|
||||
return Arch.I386
|
||||
elif ei_machine == EM_X86_64:
|
||||
return Arch.AMD64
|
||||
else:
|
||||
# not really unknown, but unsupport at the moment:
|
||||
# https://github.com/eliben/pyelftools/blob/ab444d982d1849191e910299a985989857466620/elftools/elf/enums.py#L73
|
||||
return "unknown"
|
||||
159
capa/features/extractors/elffile.py
Normal file
159
capa/features/extractors/elffile.py
Normal file
@@ -0,0 +1,159 @@
|
||||
# Copyright (C) 2020 FireEye, Inc. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import io
|
||||
import logging
|
||||
import contextlib
|
||||
from typing import Tuple, Iterator
|
||||
|
||||
from elftools.elf.elffile import ELFFile, SymbolTableSection
|
||||
|
||||
import capa.features.extractors.common
|
||||
from capa.features.file import Import, Section
|
||||
from capa.features.common import OS, FORMAT_ELF, Arch, Format, Feature
|
||||
from capa.features.extractors.elf import Arch as ElfArch
|
||||
from capa.features.extractors.base_extractor import FeatureExtractor
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def extract_file_import_names(elf, **kwargs):
|
||||
# see https://github.com/eliben/pyelftools/blob/0664de05ed2db3d39041e2d51d19622a8ef4fb0f/scripts/readelf.py#L372
|
||||
symbol_tables = [(idx, s) for idx, s in enumerate(elf.iter_sections()) if isinstance(s, SymbolTableSection)]
|
||||
|
||||
for section_index, section in symbol_tables:
|
||||
if not isinstance(section, SymbolTableSection):
|
||||
continue
|
||||
|
||||
if section["sh_entsize"] == 0:
|
||||
logger.debug("Symbol table '%s' has a sh_entsize of zero!" % (section.name))
|
||||
continue
|
||||
|
||||
logger.debug("Symbol table '%s' contains %s entries:" % (section.name, section.num_symbols()))
|
||||
|
||||
for nsym, symbol in enumerate(section.iter_symbols()):
|
||||
if symbol.name and symbol.entry.st_info.type == "STT_FUNC":
|
||||
# TODO symbol address
|
||||
# TODO symbol version info?
|
||||
yield Import(symbol.name), 0x0
|
||||
|
||||
|
||||
def extract_file_section_names(elf, **kwargs):
|
||||
for section in elf.iter_sections():
|
||||
if section.name:
|
||||
yield Section(section.name), section.header.sh_addr
|
||||
elif section.is_null():
|
||||
yield Section("NULL"), section.header.sh_addr
|
||||
|
||||
|
||||
def extract_file_strings(buf, **kwargs):
|
||||
yield from capa.features.extractors.common.extract_file_strings(buf)
|
||||
|
||||
|
||||
def extract_file_os(elf, buf, **kwargs):
|
||||
# our current approach does not always get an OS value, e.g. for packed samples
|
||||
# for file limitation purposes, we're more lax here
|
||||
try:
|
||||
os = next(capa.features.extractors.common.extract_os(buf))
|
||||
yield os
|
||||
except StopIteration:
|
||||
yield OS("unknown"), 0x0
|
||||
|
||||
|
||||
def extract_file_format(**kwargs):
|
||||
yield Format(FORMAT_ELF), 0x0
|
||||
|
||||
|
||||
def extract_file_arch(elf, **kwargs):
|
||||
# TODO merge with capa.features.extractors.elf.detect_elf_arch()
|
||||
arch = elf.get_machine_arch()
|
||||
if arch == "x86":
|
||||
yield Arch(ElfArch.I386), 0x0
|
||||
elif arch == "x64":
|
||||
yield Arch(ElfArch.AMD64), 0x0
|
||||
else:
|
||||
logger.warning("unsupported architecture: %s", arch)
|
||||
|
||||
|
||||
def extract_file_features(elf: ELFFile, buf: bytes) -> Iterator[Tuple[Feature, int]]:
|
||||
for file_handler in FILE_HANDLERS:
|
||||
for feature, va in file_handler(elf=elf, buf=buf): # type: ignore
|
||||
yield feature, va
|
||||
|
||||
|
||||
FILE_HANDLERS = (
|
||||
# TODO extract_file_export_names,
|
||||
extract_file_import_names,
|
||||
extract_file_section_names,
|
||||
extract_file_strings,
|
||||
# no library matching
|
||||
extract_file_format,
|
||||
)
|
||||
|
||||
|
||||
def extract_global_features(elf: ELFFile, buf: bytes) -> Iterator[Tuple[Feature, int]]:
|
||||
for global_handler in GLOBAL_HANDLERS:
|
||||
for feature, va in global_handler(elf=elf, buf=buf): # type: ignore
|
||||
yield feature, va
|
||||
|
||||
|
||||
GLOBAL_HANDLERS = (
|
||||
extract_file_os,
|
||||
extract_file_arch,
|
||||
)
|
||||
|
||||
|
||||
class ElfFeatureExtractor(FeatureExtractor):
|
||||
def __init__(self, path: str):
|
||||
super(ElfFeatureExtractor, self).__init__()
|
||||
self.path = path
|
||||
with open(self.path, "rb") as f:
|
||||
self.elf = ELFFile(io.BytesIO(f.read()))
|
||||
|
||||
def get_base_address(self):
|
||||
# virtual address of the first segment with type LOAD
|
||||
for segment in self.elf.iter_segments():
|
||||
if segment.header.p_type == "PT_LOAD":
|
||||
return segment.header.p_vaddr
|
||||
|
||||
def extract_global_features(self):
|
||||
with open(self.path, "rb") as f:
|
||||
buf = f.read()
|
||||
|
||||
for feature, va in extract_global_features(self.elf, buf):
|
||||
yield feature, va
|
||||
|
||||
def extract_file_features(self):
|
||||
with open(self.path, "rb") as f:
|
||||
buf = f.read()
|
||||
|
||||
for feature, va in extract_file_features(self.elf, buf):
|
||||
yield feature, va
|
||||
|
||||
def get_functions(self):
|
||||
raise NotImplementedError("ElfFeatureExtractor can only be used to extract file features")
|
||||
|
||||
def extract_function_features(self, f):
|
||||
raise NotImplementedError("ElfFeatureExtractor can only be used to extract file features")
|
||||
|
||||
def get_basic_blocks(self, f):
|
||||
raise NotImplementedError("ElfFeatureExtractor can only be used to extract file features")
|
||||
|
||||
def extract_basic_block_features(self, f, bb):
|
||||
raise NotImplementedError("ElfFeatureExtractor can only be used to extract file features")
|
||||
|
||||
def get_instructions(self, f, bb):
|
||||
raise NotImplementedError("ElfFeatureExtractor can only be used to extract file features")
|
||||
|
||||
def extract_insn_features(self, f, bb, insn):
|
||||
raise NotImplementedError("ElfFeatureExtractor can only be used to extract file features")
|
||||
|
||||
def is_library_function(self, va):
|
||||
raise NotImplementedError("ElfFeatureExtractor can only be used to extract file features")
|
||||
|
||||
def get_function_name(self, va):
|
||||
raise NotImplementedError("ElfFeatureExtractor can only be used to extract file features")
|
||||
@@ -5,11 +5,13 @@
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import idaapi
|
||||
|
||||
import capa.ida.helpers
|
||||
import capa.features.extractors.elf
|
||||
import capa.features.extractors.ida.file
|
||||
import capa.features.extractors.ida.insn
|
||||
import capa.features.extractors.ida.global_
|
||||
import capa.features.extractors.ida.function
|
||||
import capa.features.extractors.ida.basicblock
|
||||
from capa.features.extractors.base_extractor import FeatureExtractor
|
||||
@@ -57,13 +59,18 @@ class InstructionHandle:
|
||||
class IdaFeatureExtractor(FeatureExtractor):
|
||||
def __init__(self):
|
||||
super(IdaFeatureExtractor, self).__init__()
|
||||
self.global_features = []
|
||||
self.global_features.extend(capa.features.extractors.ida.global_.extract_os())
|
||||
self.global_features.extend(capa.features.extractors.ida.global_.extract_arch())
|
||||
|
||||
def get_base_address(self):
|
||||
return idaapi.get_imagebase()
|
||||
|
||||
def extract_global_features(self):
|
||||
yield from self.global_features
|
||||
|
||||
def extract_file_features(self):
|
||||
for (feature, ea) in capa.features.extractors.ida.file.extract_features():
|
||||
yield feature, ea
|
||||
yield from capa.features.extractors.ida.file.extract_features()
|
||||
|
||||
def get_functions(self):
|
||||
import capa.features.extractors.ida.helpers as ida_helpers
|
||||
@@ -84,8 +91,7 @@ class IdaFeatureExtractor(FeatureExtractor):
|
||||
return FunctionHandle(f)
|
||||
|
||||
def extract_function_features(self, f):
|
||||
for (feature, ea) in capa.features.extractors.ida.function.extract_features(f):
|
||||
yield feature, ea
|
||||
yield from capa.features.extractors.ida.function.extract_features(f)
|
||||
|
||||
def get_basic_blocks(self, f):
|
||||
import capa.features.extractors.ida.helpers as ida_helpers
|
||||
@@ -94,8 +100,7 @@ class IdaFeatureExtractor(FeatureExtractor):
|
||||
yield BasicBlockHandle(bb)
|
||||
|
||||
def extract_basic_block_features(self, f, bb):
|
||||
for (feature, ea) in capa.features.extractors.ida.basicblock.extract_features(f, bb):
|
||||
yield feature, ea
|
||||
yield from capa.features.extractors.ida.basicblock.extract_features(f, bb)
|
||||
|
||||
def get_instructions(self, f, bb):
|
||||
import capa.features.extractors.ida.helpers as ida_helpers
|
||||
@@ -104,5 +109,4 @@ class IdaFeatureExtractor(FeatureExtractor):
|
||||
yield InstructionHandle(insn)
|
||||
|
||||
def extract_insn_features(self, f, bb, insn):
|
||||
for (feature, ea) in capa.features.extractors.ida.insn.extract_features(f, bb, insn):
|
||||
yield feature, ea
|
||||
yield from capa.features.extractors.ida.insn.extract_features(f, bb, insn)
|
||||
|
||||
@@ -11,12 +11,13 @@ import struct
|
||||
import idc
|
||||
import idaapi
|
||||
import idautils
|
||||
import ida_loader
|
||||
|
||||
import capa.features.extractors.helpers
|
||||
import capa.features.extractors.strings
|
||||
import capa.features.extractors.ida.helpers
|
||||
from capa.features.file import Export, Import, Section, FunctionName
|
||||
from capa.features.common import String, Characteristic
|
||||
from capa.features.common import OS, FORMAT_PE, FORMAT_ELF, OS_WINDOWS, Format, String, Characteristic
|
||||
|
||||
|
||||
def check_segment_for_pe(seg):
|
||||
@@ -153,6 +154,19 @@ def extract_file_function_names():
|
||||
yield FunctionName(name), ea
|
||||
|
||||
|
||||
def extract_file_format():
|
||||
format_name = ida_loader.get_file_type_name()
|
||||
|
||||
if "PE" in format_name:
|
||||
yield Format(FORMAT_PE), 0x0
|
||||
elif "ELF64" in format_name:
|
||||
yield Format(FORMAT_ELF), 0x0
|
||||
elif "ELF32" in format_name:
|
||||
yield Format(FORMAT_ELF), 0x0
|
||||
else:
|
||||
raise NotImplementedError("file format: %s", format_name)
|
||||
|
||||
|
||||
def extract_features():
|
||||
"""extract file features"""
|
||||
for file_handler in FILE_HANDLERS:
|
||||
@@ -167,6 +181,7 @@ FILE_HANDLERS = (
|
||||
extract_file_section_names,
|
||||
extract_file_embedded_pe,
|
||||
extract_file_function_names,
|
||||
extract_file_format,
|
||||
)
|
||||
|
||||
|
||||
|
||||
56
capa/features/extractors/ida/global_.py
Normal file
56
capa/features/extractors/ida/global_.py
Normal file
@@ -0,0 +1,56 @@
|
||||
import logging
|
||||
import contextlib
|
||||
|
||||
import idaapi
|
||||
import ida_loader
|
||||
|
||||
import capa.ida.helpers
|
||||
import capa.features.extractors.elf
|
||||
from capa.features.common import OS, ARCH_I386, ARCH_AMD64, OS_WINDOWS, Arch
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def extract_os():
|
||||
format_name = ida_loader.get_file_type_name()
|
||||
|
||||
if "PE" in format_name:
|
||||
yield OS(OS_WINDOWS), 0x0
|
||||
|
||||
elif "ELF" in format_name:
|
||||
with contextlib.closing(capa.ida.helpers.IDAIO()) as f:
|
||||
os = capa.features.extractors.elf.detect_elf_os(f)
|
||||
|
||||
yield OS(os), 0x0
|
||||
|
||||
else:
|
||||
# we likely end up here:
|
||||
# 1. handling shellcode, or
|
||||
# 2. handling a new file format (e.g. macho)
|
||||
#
|
||||
# for (1) we can't do much - its shellcode and all bets are off.
|
||||
# we could maybe accept a futher CLI argument to specify the OS,
|
||||
# but i think this would be rarely used.
|
||||
# rules that rely on OS conditions will fail to match on shellcode.
|
||||
#
|
||||
# for (2), this logic will need to be updated as the format is implemented.
|
||||
logger.debug("unsupported file format: %s, will not guess OS", format_name)
|
||||
return
|
||||
|
||||
|
||||
def extract_arch():
|
||||
info = idaapi.get_inf_structure()
|
||||
if info.procName == "metapc" and info.is_64bit():
|
||||
yield Arch(ARCH_AMD64), 0x0
|
||||
elif info.procName == "metapc" and info.is_32bit():
|
||||
yield Arch(ARCH_I386), 0x0
|
||||
elif info.procName == "metapc":
|
||||
logger.debug("unsupported architecture: non-32-bit nor non-64-bit intel")
|
||||
return
|
||||
else:
|
||||
# we likely end up here:
|
||||
# 1. handling a new architecture (e.g. aarch64)
|
||||
#
|
||||
# for (1), this logic will need to be updated as the format is implemented.
|
||||
logger.debug("unsupported architecture: %s", info.procName)
|
||||
return
|
||||
@@ -14,8 +14,8 @@ import capa.features.extractors.helpers
|
||||
import capa.features.extractors.ida.helpers
|
||||
from capa.features.insn import API, Number, Offset, Mnemonic
|
||||
from capa.features.common import (
|
||||
ARCH_X32,
|
||||
ARCH_X64,
|
||||
BITNESS_X32,
|
||||
BITNESS_X64,
|
||||
MAX_BYTES_FEATURE_SIZE,
|
||||
THUNK_CHAIN_DEPTH_DELTA,
|
||||
Bytes,
|
||||
@@ -28,22 +28,22 @@ from capa.features.common import (
|
||||
SECURITY_COOKIE_BYTES_DELTA = 0x40
|
||||
|
||||
|
||||
def get_arch(ctx):
|
||||
def get_bitness(ctx):
|
||||
"""
|
||||
fetch the ARCH_* constant for the currently open workspace.
|
||||
fetch the BITNESS_* constant for the currently open workspace.
|
||||
|
||||
via Tamir Bahar/@tmr232
|
||||
https://reverseengineering.stackexchange.com/a/11398/17194
|
||||
"""
|
||||
if "arch" not in ctx:
|
||||
if "bitness" not in ctx:
|
||||
info = idaapi.get_inf_structure()
|
||||
if info.is_64bit():
|
||||
ctx["arch"] = ARCH_X64
|
||||
ctx["bitness"] = BITNESS_X64
|
||||
elif info.is_32bit():
|
||||
ctx["arch"] = ARCH_X32
|
||||
ctx["bitness"] = BITNESS_X32
|
||||
else:
|
||||
raise ValueError("unexpected architecture")
|
||||
return ctx["arch"]
|
||||
raise ValueError("unexpected bitness")
|
||||
return ctx["bitness"]
|
||||
|
||||
|
||||
def get_imports(ctx):
|
||||
@@ -149,7 +149,7 @@ def extract_insn_number_features(f, bb, insn):
|
||||
const = op.addr
|
||||
|
||||
yield Number(const), insn.ea
|
||||
yield Number(const, arch=get_arch(f.ctx)), insn.ea
|
||||
yield Number(const, bitness=get_bitness(f.ctx)), insn.ea
|
||||
|
||||
|
||||
def extract_insn_bytes_features(f, bb, insn):
|
||||
@@ -218,7 +218,7 @@ def extract_insn_offset_features(f, bb, insn):
|
||||
op_off = capa.features.extractors.helpers.twos_complement(op_off, 32)
|
||||
|
||||
yield Offset(op_off), insn.ea
|
||||
yield Offset(op_off, arch=get_arch(f.ctx)), insn.ea
|
||||
yield Offset(op_off, bitness=get_bitness(f.ctx)), insn.ea
|
||||
|
||||
|
||||
def contains_stack_cookie_keywords(s):
|
||||
@@ -337,7 +337,7 @@ def extract_insn_mnemonic_features(f, bb, insn):
|
||||
bb (IDA BasicBlock)
|
||||
insn (IDA insn_t)
|
||||
"""
|
||||
yield Mnemonic(insn.get_canon_mnem()), insn.ea
|
||||
yield Mnemonic(idc.print_insn_mnem(insn.ea)), insn.ea
|
||||
|
||||
|
||||
def extract_insn_peb_access_characteristic_features(f, bb, insn):
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
from networkx import nx
|
||||
import networkx
|
||||
from networkx.algorithms.components import strongly_connected_components
|
||||
|
||||
|
||||
@@ -20,6 +20,6 @@ def has_loop(edges, threshold=2):
|
||||
returns:
|
||||
bool
|
||||
"""
|
||||
g = nx.DiGraph()
|
||||
g = networkx.DiGraph()
|
||||
g.add_edges_from(edges)
|
||||
return any(len(comp) >= threshold for comp in strongly_connected_components(g))
|
||||
|
||||
@@ -5,33 +5,35 @@
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import logging
|
||||
|
||||
import pefile
|
||||
|
||||
import capa.features.common
|
||||
import capa.features.extractors
|
||||
import capa.features.extractors.common
|
||||
import capa.features.extractors.helpers
|
||||
import capa.features.extractors.strings
|
||||
from capa.features.file import Export, Import, Section
|
||||
from capa.features.common import String, Characteristic
|
||||
from capa.features.common import OS, ARCH_I386, FORMAT_PE, ARCH_AMD64, OS_WINDOWS, Arch, Format, Characteristic
|
||||
from capa.features.extractors.base_extractor import FeatureExtractor
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def extract_file_embedded_pe(pe, file_path):
|
||||
with open(file_path, "rb") as f:
|
||||
fbytes = f.read()
|
||||
|
||||
for offset, i in capa.features.extractors.helpers.carve_pe(fbytes, 1):
|
||||
def extract_file_embedded_pe(buf, **kwargs):
|
||||
for offset, _ in capa.features.extractors.helpers.carve_pe(buf, 1):
|
||||
yield Characteristic("embedded pe"), offset
|
||||
|
||||
|
||||
def extract_file_export_names(pe, file_path):
|
||||
def extract_file_export_names(pe, **kwargs):
|
||||
base_address = pe.OPTIONAL_HEADER.ImageBase
|
||||
|
||||
if hasattr(pe, "DIRECTORY_ENTRY_EXPORT"):
|
||||
for export in pe.DIRECTORY_ENTRY_EXPORT.symbols:
|
||||
if not export.name:
|
||||
continue
|
||||
try:
|
||||
name = export.name.partition(b"\x00")[0].decode("ascii")
|
||||
except UnicodeDecodeError:
|
||||
@@ -40,7 +42,7 @@ def extract_file_export_names(pe, file_path):
|
||||
yield Export(name), va
|
||||
|
||||
|
||||
def extract_file_import_names(pe, file_path):
|
||||
def extract_file_import_names(pe, **kwargs):
|
||||
"""
|
||||
extract imported function names
|
||||
1. imports by ordinal:
|
||||
@@ -72,7 +74,7 @@ def extract_file_import_names(pe, file_path):
|
||||
yield Import(name), imp.address
|
||||
|
||||
|
||||
def extract_file_section_names(pe, file_path):
|
||||
def extract_file_section_names(pe, **kwargs):
|
||||
base_address = pe.OPTIONAL_HEADER.ImageBase
|
||||
|
||||
for section in pe.sections:
|
||||
@@ -84,21 +86,11 @@ def extract_file_section_names(pe, file_path):
|
||||
yield Section(name), base_address + section.VirtualAddress
|
||||
|
||||
|
||||
def extract_file_strings(pe, file_path):
|
||||
"""
|
||||
extract ASCII and UTF-16 LE strings from file
|
||||
"""
|
||||
with open(file_path, "rb") as f:
|
||||
b = f.read()
|
||||
|
||||
for s in capa.features.extractors.strings.extract_ascii_strings(b):
|
||||
yield String(s.s), s.offset
|
||||
|
||||
for s in capa.features.extractors.strings.extract_unicode_strings(b):
|
||||
yield String(s.s), s.offset
|
||||
def extract_file_strings(buf, **kwargs):
|
||||
yield from capa.features.extractors.common.extract_file_strings(buf)
|
||||
|
||||
|
||||
def extract_file_function_names(pe, file_path):
|
||||
def extract_file_function_names(**kwargs):
|
||||
"""
|
||||
extract the names of statically-linked library functions.
|
||||
"""
|
||||
@@ -108,20 +100,39 @@ def extract_file_function_names(pe, file_path):
|
||||
return
|
||||
|
||||
|
||||
def extract_file_features(pe, file_path):
|
||||
def extract_file_os(**kwargs):
|
||||
# assuming PE -> Windows
|
||||
# though i suppose they're also used by UEFI
|
||||
yield OS(OS_WINDOWS), 0x0
|
||||
|
||||
|
||||
def extract_file_format(**kwargs):
|
||||
yield Format(FORMAT_PE), 0x0
|
||||
|
||||
|
||||
def extract_file_arch(pe, **kwargs):
|
||||
if pe.FILE_HEADER.Machine == pefile.MACHINE_TYPE["IMAGE_FILE_MACHINE_I386"]:
|
||||
yield Arch(ARCH_I386), 0x0
|
||||
elif pe.FILE_HEADER.Machine == pefile.MACHINE_TYPE["IMAGE_FILE_MACHINE_AMD64"]:
|
||||
yield Arch(ARCH_AMD64), 0x0
|
||||
else:
|
||||
logger.warning("unsupported architecture: %s", pefile.MACHINE_TYPE[pe.FILE_HEADER.Machine])
|
||||
|
||||
|
||||
def extract_file_features(pe, buf):
|
||||
"""
|
||||
extract file features from given workspace
|
||||
|
||||
args:
|
||||
pe (pefile.PE): the parsed PE
|
||||
file_path: path to the input file
|
||||
buf: the raw sample bytes
|
||||
|
||||
yields:
|
||||
Tuple[Feature, VA]: a feature and its location.
|
||||
"""
|
||||
|
||||
for file_handler in FILE_HANDLERS:
|
||||
for feature, va in file_handler(pe, file_path):
|
||||
for feature, va in file_handler(pe=pe, buf=buf):
|
||||
yield feature, va
|
||||
|
||||
|
||||
@@ -132,6 +143,29 @@ FILE_HANDLERS = (
|
||||
extract_file_section_names,
|
||||
extract_file_strings,
|
||||
extract_file_function_names,
|
||||
extract_file_format,
|
||||
)
|
||||
|
||||
|
||||
def extract_global_features(pe, buf):
|
||||
"""
|
||||
extract global features from given workspace
|
||||
|
||||
args:
|
||||
pe (pefile.PE): the parsed PE
|
||||
buf: the raw sample bytes
|
||||
|
||||
yields:
|
||||
Tuple[Feature, VA]: a feature and its location.
|
||||
"""
|
||||
for handler in GLOBAL_HANDLERS:
|
||||
for feature, va in handler(pe=pe, buf=buf):
|
||||
yield feature, va
|
||||
|
||||
|
||||
GLOBAL_HANDLERS = (
|
||||
extract_file_os,
|
||||
extract_file_arch,
|
||||
)
|
||||
|
||||
|
||||
@@ -144,9 +178,17 @@ class PefileFeatureExtractor(FeatureExtractor):
|
||||
def get_base_address(self):
|
||||
return self.pe.OPTIONAL_HEADER.ImageBase
|
||||
|
||||
def extract_global_features(self):
|
||||
with open(self.path, "rb") as f:
|
||||
buf = f.read()
|
||||
|
||||
yield from extract_global_features(self.pe, buf)
|
||||
|
||||
def extract_file_features(self):
|
||||
for feature, va in extract_file_features(self.pe, self.path):
|
||||
yield feature, va
|
||||
with open(self.path, "rb") as f:
|
||||
buf = f.read()
|
||||
|
||||
yield from extract_file_features(self.pe, buf)
|
||||
|
||||
def get_functions(self):
|
||||
raise NotImplementedError("PefileFeatureExtract can only be used to extract file features")
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
from smda.common.SmdaReport import SmdaReport
|
||||
|
||||
import capa.features.extractors.common
|
||||
import capa.features.extractors.smda.file
|
||||
import capa.features.extractors.smda.insn
|
||||
import capa.features.extractors.smda.global_
|
||||
import capa.features.extractors.smda.function
|
||||
import capa.features.extractors.smda.basicblock
|
||||
from capa.features.extractors.base_extractor import FeatureExtractor
|
||||
@@ -12,34 +14,40 @@ class SmdaFeatureExtractor(FeatureExtractor):
|
||||
super(SmdaFeatureExtractor, self).__init__()
|
||||
self.smda_report = smda_report
|
||||
self.path = path
|
||||
with open(self.path, "rb") as f:
|
||||
self.buf = f.read()
|
||||
|
||||
# pre-compute these because we'll yield them at *every* scope.
|
||||
self.global_features = []
|
||||
self.global_features.extend(capa.features.extractors.common.extract_os(self.buf))
|
||||
self.global_features.extend(capa.features.extractors.smda.global_.extract_arch(self.smda_report))
|
||||
|
||||
def get_base_address(self):
|
||||
return self.smda_report.base_addr
|
||||
|
||||
def extract_global_features(self):
|
||||
yield from self.global_features
|
||||
|
||||
def extract_file_features(self):
|
||||
for feature, va in capa.features.extractors.smda.file.extract_features(self.smda_report, self.path):
|
||||
yield feature, va
|
||||
yield from capa.features.extractors.smda.file.extract_features(self.smda_report, self.buf)
|
||||
|
||||
def get_functions(self):
|
||||
for function in self.smda_report.getFunctions():
|
||||
yield function
|
||||
|
||||
def extract_function_features(self, f):
|
||||
for feature, va in capa.features.extractors.smda.function.extract_features(f):
|
||||
yield feature, va
|
||||
yield from capa.features.extractors.smda.function.extract_features(f)
|
||||
|
||||
def get_basic_blocks(self, f):
|
||||
for bb in f.getBlocks():
|
||||
yield bb
|
||||
|
||||
def extract_basic_block_features(self, f, bb):
|
||||
for feature, va in capa.features.extractors.smda.basicblock.extract_features(f, bb):
|
||||
yield feature, va
|
||||
yield from capa.features.extractors.smda.basicblock.extract_features(f, bb)
|
||||
|
||||
def get_instructions(self, f, bb):
|
||||
for smda_ins in bb.getInstructions():
|
||||
yield smda_ins
|
||||
|
||||
def extract_insn_features(self, f, bb, insn):
|
||||
for feature, va in capa.features.extractors.smda.insn.extract_features(f, bb, insn):
|
||||
yield feature, va
|
||||
yield from capa.features.extractors.smda.insn.extract_features(f, bb, insn)
|
||||
|
||||
@@ -1,30 +1,29 @@
|
||||
# if we have SMDA we definitely have lief
|
||||
import lief
|
||||
|
||||
import capa.features.extractors.common
|
||||
import capa.features.extractors.helpers
|
||||
import capa.features.extractors.strings
|
||||
from capa.features.file import Export, Import, Section
|
||||
from capa.features.common import String, Characteristic
|
||||
|
||||
|
||||
def extract_file_embedded_pe(smda_report, file_path):
|
||||
with open(file_path, "rb") as f:
|
||||
fbytes = f.read()
|
||||
|
||||
for offset, i in capa.features.extractors.helpers.carve_pe(fbytes, 1):
|
||||
def extract_file_embedded_pe(buf, **kwargs):
|
||||
for offset, _ in capa.features.extractors.helpers.carve_pe(buf, 1):
|
||||
yield Characteristic("embedded pe"), offset
|
||||
|
||||
|
||||
def extract_file_export_names(smda_report, file_path):
|
||||
lief_binary = lief.parse(file_path)
|
||||
def extract_file_export_names(buf, **kwargs):
|
||||
lief_binary = lief.parse(buf)
|
||||
|
||||
if lief_binary is not None:
|
||||
for function in lief_binary.exported_functions:
|
||||
yield Export(function.name), function.address
|
||||
|
||||
|
||||
def extract_file_import_names(smda_report, file_path):
|
||||
def extract_file_import_names(smda_report, buf):
|
||||
# extract import table info via LIEF
|
||||
lief_binary = lief.parse(file_path)
|
||||
lief_binary = lief.parse(buf)
|
||||
if not isinstance(lief_binary, lief.PE.Binary):
|
||||
return
|
||||
for imported_library in lief_binary.imports:
|
||||
@@ -40,8 +39,8 @@ def extract_file_import_names(smda_report, file_path):
|
||||
yield Import(name), va
|
||||
|
||||
|
||||
def extract_file_section_names(smda_report, file_path):
|
||||
lief_binary = lief.parse(file_path)
|
||||
def extract_file_section_names(buf, **kwargs):
|
||||
lief_binary = lief.parse(buf)
|
||||
if not isinstance(lief_binary, lief.PE.Binary):
|
||||
return
|
||||
if lief_binary and lief_binary.sections:
|
||||
@@ -50,21 +49,18 @@ def extract_file_section_names(smda_report, file_path):
|
||||
yield Section(section.name), base_address + section.virtual_address
|
||||
|
||||
|
||||
def extract_file_strings(smda_report, file_path):
|
||||
def extract_file_strings(buf, **kwargs):
|
||||
"""
|
||||
extract ASCII and UTF-16 LE strings from file
|
||||
"""
|
||||
with open(file_path, "rb") as f:
|
||||
b = f.read()
|
||||
|
||||
for s in capa.features.extractors.strings.extract_ascii_strings(b):
|
||||
for s in capa.features.extractors.strings.extract_ascii_strings(buf):
|
||||
yield String(s.s), s.offset
|
||||
|
||||
for s in capa.features.extractors.strings.extract_unicode_strings(b):
|
||||
for s in capa.features.extractors.strings.extract_unicode_strings(buf):
|
||||
yield String(s.s), s.offset
|
||||
|
||||
|
||||
def extract_file_function_names(smda_report, file_path):
|
||||
def extract_file_function_names(smda_report, **kwargs):
|
||||
"""
|
||||
extract the names of statically-linked library functions.
|
||||
"""
|
||||
@@ -74,20 +70,24 @@ def extract_file_function_names(smda_report, file_path):
|
||||
return
|
||||
|
||||
|
||||
def extract_features(smda_report, file_path):
|
||||
def extract_file_format(buf, **kwargs):
|
||||
yield from capa.features.extractors.common.extract_format(buf)
|
||||
|
||||
|
||||
def extract_features(smda_report, buf):
|
||||
"""
|
||||
extract file features from given workspace
|
||||
|
||||
args:
|
||||
smda_report (smda.common.SmdaReport): a SmdaReport
|
||||
file_path: path to the input file
|
||||
buf: the raw bytes of the sample
|
||||
|
||||
yields:
|
||||
Tuple[Feature, VA]: a feature and its location.
|
||||
"""
|
||||
|
||||
for file_handler in FILE_HANDLERS:
|
||||
for feature, va in file_handler(smda_report, file_path):
|
||||
for feature, va in file_handler(smda_report=smda_report, buf=buf):
|
||||
yield feature, va
|
||||
|
||||
|
||||
@@ -98,4 +98,5 @@ FILE_HANDLERS = (
|
||||
extract_file_section_names,
|
||||
extract_file_strings,
|
||||
extract_file_function_names,
|
||||
extract_file_format,
|
||||
)
|
||||
|
||||
20
capa/features/extractors/smda/global_.py
Normal file
20
capa/features/extractors/smda/global_.py
Normal file
@@ -0,0 +1,20 @@
|
||||
import logging
|
||||
|
||||
from capa.features.common import ARCH_I386, ARCH_AMD64, Arch
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def extract_arch(smda_report):
|
||||
if smda_report.architecture == "intel":
|
||||
if smda_report.bitness == 32:
|
||||
yield Arch(ARCH_I386), 0x0
|
||||
elif smda_report.bitness == 64:
|
||||
yield Arch(ARCH_AMD64), 0x0
|
||||
else:
|
||||
# we likely end up here:
|
||||
# 1. handling a new architecture (e.g. aarch64)
|
||||
#
|
||||
# for (1), this logic will need to be updated as the format is implemented.
|
||||
logger.debug("unsupported architecture: %s", smda_report.architecture)
|
||||
return
|
||||
@@ -7,8 +7,8 @@ from smda.common.SmdaReport import SmdaReport
|
||||
import capa.features.extractors.helpers
|
||||
from capa.features.insn import API, Number, Offset, Mnemonic
|
||||
from capa.features.common import (
|
||||
ARCH_X32,
|
||||
ARCH_X64,
|
||||
BITNESS_X32,
|
||||
BITNESS_X64,
|
||||
MAX_BYTES_FEATURE_SIZE,
|
||||
THUNK_CHAIN_DEPTH_DELTA,
|
||||
Bytes,
|
||||
@@ -23,12 +23,12 @@ PATTERN_HEXNUM = re.compile(r"[+\-] (?P<num>0x[a-fA-F0-9]+)")
|
||||
PATTERN_SINGLENUM = re.compile(r"[+\-] (?P<num>[0-9])")
|
||||
|
||||
|
||||
def get_arch(smda_report):
|
||||
def get_bitness(smda_report):
|
||||
if smda_report.architecture == "intel":
|
||||
if smda_report.bitness == 32:
|
||||
return ARCH_X32
|
||||
return BITNESS_X32
|
||||
elif smda_report.bitness == 64:
|
||||
return ARCH_X64
|
||||
return BITNESS_X64
|
||||
else:
|
||||
raise NotImplementedError
|
||||
|
||||
@@ -85,7 +85,7 @@ def extract_insn_number_features(f, bb, insn):
|
||||
for operand in operands:
|
||||
try:
|
||||
yield Number(int(operand, 16)), insn.offset
|
||||
yield Number(int(operand, 16), arch=get_arch(f.smda_report)), insn.offset
|
||||
yield Number(int(operand, 16), bitness=get_bitness(f.smda_report)), insn.offset
|
||||
except:
|
||||
continue
|
||||
|
||||
@@ -228,7 +228,7 @@ def extract_insn_offset_features(f, bb, insn):
|
||||
number = int(number_int.group("num"))
|
||||
number = -1 * number if number_int.group().startswith("-") else number
|
||||
yield Offset(number), insn.offset
|
||||
yield Offset(number, arch=get_arch(f.smda_report)), insn.offset
|
||||
yield Offset(number, bitness=get_bitness(f.smda_report)), insn.offset
|
||||
|
||||
|
||||
def is_security_cookie(f, bb, insn):
|
||||
|
||||
@@ -10,8 +10,10 @@ import logging
|
||||
import viv_utils
|
||||
import viv_utils.flirt
|
||||
|
||||
import capa.features.extractors.common
|
||||
import capa.features.extractors.viv.file
|
||||
import capa.features.extractors.viv.insn
|
||||
import capa.features.extractors.viv.global_
|
||||
import capa.features.extractors.viv.function
|
||||
import capa.features.extractors.viv.basicblock
|
||||
from capa.features.extractors.base_extractor import FeatureExtractor
|
||||
@@ -37,37 +39,43 @@ class VivisectFeatureExtractor(FeatureExtractor):
|
||||
super(VivisectFeatureExtractor, self).__init__()
|
||||
self.vw = vw
|
||||
self.path = path
|
||||
with open(self.path, "rb") as f:
|
||||
self.buf = f.read()
|
||||
|
||||
# pre-compute these because we'll yield them at *every* scope.
|
||||
self.global_features = []
|
||||
self.global_features.extend(capa.features.extractors.common.extract_os(self.buf))
|
||||
self.global_features.extend(capa.features.extractors.viv.global_.extract_arch(self.vw))
|
||||
|
||||
def get_base_address(self):
|
||||
# assume there is only one file loaded into the vw
|
||||
return list(self.vw.filemeta.values())[0]["imagebase"]
|
||||
|
||||
def extract_global_features(self):
|
||||
yield from self.global_features
|
||||
|
||||
def extract_file_features(self):
|
||||
for feature, va in capa.features.extractors.viv.file.extract_features(self.vw, self.path):
|
||||
yield feature, va
|
||||
yield from capa.features.extractors.viv.file.extract_features(self.vw, self.buf)
|
||||
|
||||
def get_functions(self):
|
||||
for va in sorted(self.vw.getFunctions()):
|
||||
yield viv_utils.Function(self.vw, va)
|
||||
|
||||
def extract_function_features(self, f):
|
||||
for feature, va in capa.features.extractors.viv.function.extract_features(f):
|
||||
yield feature, va
|
||||
yield from capa.features.extractors.viv.function.extract_features(f)
|
||||
|
||||
def get_basic_blocks(self, f):
|
||||
return f.basic_blocks
|
||||
|
||||
def extract_basic_block_features(self, f, bb):
|
||||
for feature, va in capa.features.extractors.viv.basicblock.extract_features(f, bb):
|
||||
yield feature, va
|
||||
yield from capa.features.extractors.viv.basicblock.extract_features(f, bb)
|
||||
|
||||
def get_instructions(self, f, bb):
|
||||
for insn in bb.instructions:
|
||||
yield InstructionHandle(insn)
|
||||
|
||||
def extract_insn_features(self, f, bb, insn):
|
||||
for feature, va in capa.features.extractors.viv.insn.extract_features(f, bb, insn):
|
||||
yield feature, va
|
||||
yield from capa.features.extractors.viv.insn.extract_features(f, bb, insn)
|
||||
|
||||
def is_library_function(self, va):
|
||||
return viv_utils.flirt.is_library_function(self.vw, va)
|
||||
|
||||
@@ -11,26 +11,24 @@ import viv_utils
|
||||
import viv_utils.flirt
|
||||
|
||||
import capa.features.insn
|
||||
import capa.features.extractors.common
|
||||
import capa.features.extractors.helpers
|
||||
import capa.features.extractors.strings
|
||||
from capa.features.file import Export, Import, Section, FunctionName
|
||||
from capa.features.common import String, Characteristic
|
||||
|
||||
|
||||
def extract_file_embedded_pe(vw, file_path):
|
||||
with open(file_path, "rb") as f:
|
||||
fbytes = f.read()
|
||||
|
||||
for offset, i in pe_carve.carve(fbytes, 1):
|
||||
def extract_file_embedded_pe(buf, **kwargs):
|
||||
for offset, _ in pe_carve.carve(buf, 1):
|
||||
yield Characteristic("embedded pe"), offset
|
||||
|
||||
|
||||
def extract_file_export_names(vw, file_path):
|
||||
for va, etype, name, _ in vw.getExports():
|
||||
def extract_file_export_names(vw, **kwargs):
|
||||
for va, _, name, _ in vw.getExports():
|
||||
yield Export(name), va
|
||||
|
||||
|
||||
def extract_file_import_names(vw, file_path):
|
||||
def extract_file_import_names(vw, **kwargs):
|
||||
"""
|
||||
extract imported function names
|
||||
1. imports by ordinal:
|
||||
@@ -41,7 +39,7 @@ def extract_file_import_names(vw, file_path):
|
||||
"""
|
||||
for va, _, _, tinfo in vw.getImports():
|
||||
# vivisect source: tinfo = "%s.%s" % (libname, impname)
|
||||
modname, impname = tinfo.split(".")
|
||||
modname, impname = tinfo.split(".", 1)
|
||||
if is_viv_ord_impname(impname):
|
||||
# replace ord prefix with #
|
||||
impname = "#%s" % impname[len("ord") :]
|
||||
@@ -64,26 +62,16 @@ def is_viv_ord_impname(impname: str) -> bool:
|
||||
return True
|
||||
|
||||
|
||||
def extract_file_section_names(vw, file_path):
|
||||
def extract_file_section_names(vw, **kwargs):
|
||||
for va, _, segname, _ in vw.getSegments():
|
||||
yield Section(segname), va
|
||||
|
||||
|
||||
def extract_file_strings(vw, file_path):
|
||||
"""
|
||||
extract ASCII and UTF-16 LE strings from file
|
||||
"""
|
||||
with open(file_path, "rb") as f:
|
||||
b = f.read()
|
||||
|
||||
for s in capa.features.extractors.strings.extract_ascii_strings(b):
|
||||
yield String(s.s), s.offset
|
||||
|
||||
for s in capa.features.extractors.strings.extract_unicode_strings(b):
|
||||
yield String(s.s), s.offset
|
||||
def extract_file_strings(buf, **kwargs):
|
||||
yield from capa.features.extractors.common.extract_file_strings(buf)
|
||||
|
||||
|
||||
def extract_file_function_names(vw, file_path):
|
||||
def extract_file_function_names(vw, **kwargs):
|
||||
"""
|
||||
extract the names of statically-linked library functions.
|
||||
"""
|
||||
@@ -93,20 +81,24 @@ def extract_file_function_names(vw, file_path):
|
||||
yield FunctionName(name), va
|
||||
|
||||
|
||||
def extract_features(vw, file_path):
|
||||
def extract_file_format(buf, **kwargs):
|
||||
yield from capa.features.extractors.common.extract_format(buf)
|
||||
|
||||
|
||||
def extract_features(vw, buf: bytes):
|
||||
"""
|
||||
extract file features from given workspace
|
||||
|
||||
args:
|
||||
vw (vivisect.VivWorkspace): the vivisect workspace
|
||||
file_path: path to the input file
|
||||
buf: the raw input file bytes
|
||||
|
||||
yields:
|
||||
Tuple[Feature, VA]: a feature and its location.
|
||||
"""
|
||||
|
||||
for file_handler in FILE_HANDLERS:
|
||||
for feature, va in file_handler(vw, file_path):
|
||||
for feature, va in file_handler(vw=vw, buf=buf): # type: ignore
|
||||
yield feature, va
|
||||
|
||||
|
||||
@@ -117,4 +109,5 @@ FILE_HANDLERS = (
|
||||
extract_file_section_names,
|
||||
extract_file_strings,
|
||||
extract_file_function_names,
|
||||
extract_file_format,
|
||||
)
|
||||
|
||||
24
capa/features/extractors/viv/global_.py
Normal file
24
capa/features/extractors/viv/global_.py
Normal file
@@ -0,0 +1,24 @@
|
||||
import logging
|
||||
|
||||
import envi.archs.i386
|
||||
import envi.archs.amd64
|
||||
|
||||
from capa.features.common import ARCH_I386, ARCH_AMD64, Arch
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def extract_arch(vw):
|
||||
if isinstance(vw.arch, envi.archs.amd64.Amd64Module):
|
||||
yield Arch(ARCH_AMD64), 0x0
|
||||
|
||||
elif isinstance(vw.arch, envi.archs.i386.i386Module):
|
||||
yield Arch(ARCH_I386), 0x0
|
||||
|
||||
else:
|
||||
# we likely end up here:
|
||||
# 1. handling a new architecture (e.g. aarch64)
|
||||
#
|
||||
# for (1), this logic will need to be updated as the format is implemented.
|
||||
logger.debug("unsupported architecture: %s", vw.arch.__class__.__name__)
|
||||
return
|
||||
@@ -19,8 +19,8 @@ import capa.features.extractors.helpers
|
||||
import capa.features.extractors.viv.helpers
|
||||
from capa.features.insn import API, Number, Offset, Mnemonic
|
||||
from capa.features.common import (
|
||||
ARCH_X32,
|
||||
ARCH_X64,
|
||||
BITNESS_X32,
|
||||
BITNESS_X64,
|
||||
MAX_BYTES_FEATURE_SIZE,
|
||||
THUNK_CHAIN_DEPTH_DELTA,
|
||||
Bytes,
|
||||
@@ -34,12 +34,12 @@ from capa.features.extractors.viv.indirect_calls import NotFoundError, resolve_i
|
||||
SECURITY_COOKIE_BYTES_DELTA = 0x40
|
||||
|
||||
|
||||
def get_arch(vw):
|
||||
arch = vw.getMeta("Architecture")
|
||||
if arch == "i386":
|
||||
return ARCH_X32
|
||||
elif arch == "amd64":
|
||||
return ARCH_X64
|
||||
def get_bitness(vw):
|
||||
bitness = vw.getMeta("Architecture")
|
||||
if bitness == "i386":
|
||||
return BITNESS_X32
|
||||
elif bitness == "amd64":
|
||||
return BITNESS_X64
|
||||
|
||||
|
||||
def interface_extract_instruction_XXX(f, bb, insn):
|
||||
@@ -127,6 +127,10 @@ def extract_insn_api_features(f, bb, insn):
|
||||
for name in capa.features.extractors.helpers.generate_symbols(dll, symbol):
|
||||
yield API(name), insn.va
|
||||
|
||||
# if jump leads to an ENDBRANCH instruction, skip it
|
||||
if f.vw.getByteDef(target)[1].startswith(b"\xf3\x0f\x1e"):
|
||||
target += 4
|
||||
|
||||
target = capa.features.extractors.viv.helpers.get_coderef_from(f.vw, target)
|
||||
if not target:
|
||||
return
|
||||
@@ -189,7 +193,7 @@ def extract_insn_number_features(f, bb, insn):
|
||||
return
|
||||
|
||||
yield Number(v), insn.va
|
||||
yield Number(v, arch=get_arch(f.vw)), insn.va
|
||||
yield Number(v, bitness=get_bitness(f.vw)), insn.va
|
||||
|
||||
|
||||
def derefs(vw, p):
|
||||
@@ -385,7 +389,7 @@ def extract_insn_offset_features(f, bb, insn):
|
||||
v = oper.disp
|
||||
|
||||
yield Offset(v), insn.va
|
||||
yield Offset(v, arch=get_arch(f.vw)), insn.va
|
||||
yield Offset(v, bitness=get_bitness(f.vw)), insn.va
|
||||
|
||||
# like: [esi + ecx + 16384]
|
||||
# reg ^ ^
|
||||
@@ -396,7 +400,7 @@ def extract_insn_offset_features(f, bb, insn):
|
||||
v = oper.disp
|
||||
|
||||
yield Offset(v), insn.va
|
||||
yield Offset(v, arch=get_arch(f.vw)), insn.va
|
||||
yield Offset(v, bitness=get_bitness(f.vw)), insn.va
|
||||
|
||||
|
||||
def is_security_cookie(f, bb, insn) -> bool:
|
||||
|
||||
@@ -19,6 +19,10 @@ json format:
|
||||
...
|
||||
},
|
||||
'scopes': {
|
||||
'global': [
|
||||
(str(name), [any(arg), ...], int(va), ()),
|
||||
...
|
||||
},
|
||||
'file': [
|
||||
(str(name), [any(arg), ...], int(va), ()),
|
||||
...
|
||||
@@ -52,7 +56,6 @@ See the License for the specific language governing permissions and limitations
|
||||
import json
|
||||
import zlib
|
||||
import logging
|
||||
import os.path
|
||||
|
||||
import capa.features.file
|
||||
import capa.features.insn
|
||||
@@ -91,12 +94,15 @@ def dumps(extractor):
|
||||
"base address": extractor.get_base_address(),
|
||||
"functions": {},
|
||||
"scopes": {
|
||||
"global": [],
|
||||
"file": [],
|
||||
"function": [],
|
||||
"basic block": [],
|
||||
"instruction": [],
|
||||
},
|
||||
}
|
||||
for feature, va in extractor.extract_global_features():
|
||||
ret["scopes"]["global"].append(serialize_feature(feature) + (hex(va), ()))
|
||||
|
||||
for feature, va in extractor.extract_file_features():
|
||||
ret["scopes"]["file"].append(serialize_feature(feature) + (hex(va), ()))
|
||||
@@ -151,6 +157,7 @@ def loads(s):
|
||||
|
||||
features = {
|
||||
"base address": doc.get("base address"),
|
||||
"global features": [],
|
||||
"file features": [],
|
||||
"functions": {},
|
||||
}
|
||||
@@ -180,6 +187,12 @@ def loads(s):
|
||||
# ('MatchedRule', ('foo', ), '0x401000', ('0x401000', ))
|
||||
# ^^^^^^^^^^^^^ ^^^^^^^^^ ^^^^^^^^^^ ^^^^^^^^^^^^^^
|
||||
# feature name args addr func/bb/insn
|
||||
for feature in doc.get("scopes", {}).get("global", []):
|
||||
va, loc = feature[2:]
|
||||
va = int(va, 0x10)
|
||||
feature = deserialize_feature(feature[:2])
|
||||
features["global features"].append((va, feature))
|
||||
|
||||
for feature in doc.get("scopes", {}).get("file", []):
|
||||
va, loc = feature[2:]
|
||||
va = int(va, 0x10)
|
||||
|
||||
@@ -21,16 +21,16 @@ class API(Feature):
|
||||
|
||||
|
||||
class Number(Feature):
|
||||
def __init__(self, value: int, arch=None, description=None):
|
||||
super(Number, self).__init__(value, arch=arch, description=description)
|
||||
def __init__(self, value: int, bitness=None, description=None):
|
||||
super(Number, self).__init__(value, bitness=bitness, description=description)
|
||||
|
||||
def get_value_str(self):
|
||||
return capa.render.utils.hex(self.value)
|
||||
|
||||
|
||||
class Offset(Feature):
|
||||
def __init__(self, value: int, arch=None, description=None):
|
||||
super(Offset, self).__init__(value, arch=arch, description=description)
|
||||
def __init__(self, value: int, bitness=None, description=None):
|
||||
super(Offset, self).__init__(value, bitness=bitness, description=description)
|
||||
|
||||
def get_value_str(self):
|
||||
return capa.render.utils.hex(self.value)
|
||||
|
||||
@@ -21,3 +21,12 @@ def get_file_taste(sample_path: str) -> bytes:
|
||||
with open(sample_path, "rb") as f:
|
||||
taste = f.read(8)
|
||||
return taste
|
||||
|
||||
|
||||
def is_runtime_ida():
|
||||
try:
|
||||
import idc
|
||||
except ImportError:
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
@@ -12,6 +12,8 @@ import datetime
|
||||
import idc
|
||||
import idaapi
|
||||
import idautils
|
||||
import ida_bytes
|
||||
import ida_loader
|
||||
|
||||
import capa
|
||||
import capa.version
|
||||
@@ -19,21 +21,23 @@ import capa.features.common
|
||||
|
||||
logger = logging.getLogger("capa")
|
||||
|
||||
SUPPORTED_IDA_VERSIONS = [
|
||||
"7.1",
|
||||
"7.2",
|
||||
"7.3",
|
||||
# IDA version as returned by idaapi.get_kernel_version()
|
||||
SUPPORTED_IDA_VERSIONS = (
|
||||
"7.4",
|
||||
"7.5",
|
||||
"7.6",
|
||||
]
|
||||
)
|
||||
|
||||
# file type names as returned by idaapi.get_file_type_name()
|
||||
SUPPORTED_FILE_TYPES = [
|
||||
"Portable executable for 80386 (PE)",
|
||||
"Portable executable for AMD64 (PE)",
|
||||
"Binary file", # x86/AMD64 shellcode support
|
||||
]
|
||||
# file type as returned by idainfo.file_type
|
||||
SUPPORTED_FILE_TYPES = (
|
||||
idaapi.f_PE,
|
||||
idaapi.f_ELF,
|
||||
idaapi.f_BIN,
|
||||
# idaapi.f_MACHO,
|
||||
)
|
||||
|
||||
# arch type as returned by idainfo.procname
|
||||
SUPPORTED_ARCH_TYPES = ("metapc",)
|
||||
|
||||
|
||||
def inform_user_ida_ui(message):
|
||||
@@ -53,13 +57,13 @@ def is_supported_ida_version():
|
||||
|
||||
|
||||
def is_supported_file_type():
|
||||
file_type = idaapi.get_file_type_name()
|
||||
if file_type not in SUPPORTED_FILE_TYPES:
|
||||
file_info = idaapi.get_inf_structure()
|
||||
if file_info.filetype not in SUPPORTED_FILE_TYPES:
|
||||
logger.error("-" * 80)
|
||||
logger.error(" Input file does not appear to be a PE file.")
|
||||
logger.error(" Input file does not appear to be a supported file type.")
|
||||
logger.error(" ")
|
||||
logger.error(
|
||||
" capa currently only supports analyzing PE files (or binary files containing x86/AMD64 shellcode) with IDA."
|
||||
" capa currently only supports analyzing PE, ELF, or binary files containing x86 (32- and 64-bit) shellcode."
|
||||
)
|
||||
logger.error(" If you don't know the input file type, you can try using the `file` utility to guess it.")
|
||||
logger.error("-" * 80)
|
||||
@@ -67,6 +71,18 @@ def is_supported_file_type():
|
||||
return True
|
||||
|
||||
|
||||
def is_supported_arch_type():
|
||||
file_info = idaapi.get_inf_structure()
|
||||
if file_info.procname not in SUPPORTED_ARCH_TYPES or not any((file_info.is_32bit(), file_info.is_64bit())):
|
||||
logger.error("-" * 80)
|
||||
logger.error(" Input file does not appear to target a supported architecture.")
|
||||
logger.error(" ")
|
||||
logger.error(" capa currently only supports analyzing x86 (32- and 64-bit).")
|
||||
logger.error("-" * 80)
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def get_disasm_line(va):
|
||||
""" """
|
||||
return idc.generate_disasm_line(va, idc.GENDSM_FORCE_CODE)
|
||||
@@ -121,3 +137,30 @@ def collect_metadata():
|
||||
},
|
||||
"version": capa.version.__version__,
|
||||
}
|
||||
|
||||
|
||||
class IDAIO:
|
||||
"""
|
||||
An object that acts as a file-like object,
|
||||
using bytes from the current IDB workspace.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super(IDAIO, self).__init__()
|
||||
self.offset = 0
|
||||
|
||||
def seek(self, offset, whence=0):
|
||||
assert whence == 0
|
||||
self.offset = offset
|
||||
|
||||
def read(self, size):
|
||||
ea = ida_loader.get_fileregion_ea(self.offset)
|
||||
if ea == idc.BADADDR:
|
||||
# best guess, such as if file is mapped at address 0x0.
|
||||
ea = self.offset
|
||||
|
||||
logger.debug("reading 0x%x bytes at 0x%x (ea: 0x%x)", size, self.offset, ea)
|
||||
return ida_bytes.get_bytes(ea, size)
|
||||
|
||||
def close(self):
|
||||
return
|
||||
|
||||
@@ -34,12 +34,26 @@ For more information on the FLARE team's open-source framework, capa, check out
|
||||
|
||||
### Requirements
|
||||
|
||||
capa explorer supports Python >= 3.6 and the following IDA Pro versions:
|
||||
capa explorer supports Python versions >= 3.6.x and the following IDA Pro versions:
|
||||
|
||||
* IDA 7.4
|
||||
* IDA 7.5
|
||||
* IDA 7.6 (caveat below)
|
||||
|
||||
capa explorer is however limited to the Python versions supported by your IDA installation (which may not include all Python versions >= 3.6.x). Based on our testing the following matrix shows the Python versions supported
|
||||
by each supported IDA version:
|
||||
|
||||
| | IDA 7.4 | IDA 7.5 | IDA 7.6 |
|
||||
| --- | --- | --- | --- |
|
||||
| Python 3.6.x | Yes | Yes | Yes |
|
||||
| Python 3.7.x | Yes | Yes | Yes |
|
||||
| Python 3.8.x | Partial (see below) | Yes | Yes |
|
||||
| Python 3.9.x | No | Partial (see below) | Yes |
|
||||
|
||||
To use capa explorer with IDA 7.4 and Python 3.8.x you must follow the instructions provided by hex-rays [here](https://hex-rays.com/blog/ida-7-4-and-python-3-8/).
|
||||
|
||||
To use capa explorer with IDA 7.5 and Python 3.9.x you must follow the instructions provided by hex-rays [here](https://hex-rays.com/blog/python-3-9-support-for-ida-7-5/).
|
||||
|
||||
If you encounter issues with your specific setup, please open a new [Issue](https://github.com/fireeye/capa/issues).
|
||||
|
||||
#### IDA 7.6 caveat: IDA 7.6sp1 or patch required
|
||||
@@ -61,8 +75,8 @@ Therefore, in order to use capa under IDA 7.6 you need the [Service Pack 1 for I
|
||||
|
||||
capa explorer is limited to the file types supported by capa, which include:
|
||||
|
||||
* Windows 32-bit and 64-bit PE files
|
||||
* Windows 32-bit and 64-bit shellcode
|
||||
* Windows x86 (32- and 64-bit) PE and ELF files
|
||||
* Windows x86 (32- and 64-bit) shellcode
|
||||
|
||||
### Installation
|
||||
|
||||
|
||||
@@ -47,6 +47,8 @@ class CapaExplorerPlugin(idaapi.plugin_t):
|
||||
return idaapi.PLUGIN_SKIP
|
||||
if not capa.ida.helpers.is_supported_file_type():
|
||||
return idaapi.PLUGIN_SKIP
|
||||
if not capa.ida.helpers.is_supported_arch_type():
|
||||
return idaapi.PLUGIN_SKIP
|
||||
return idaapi.PLUGIN_OK
|
||||
|
||||
def term(self):
|
||||
|
||||
@@ -54,8 +54,6 @@ class Options(IntFlag):
|
||||
|
||||
def write_file(path, data):
|
||||
""" """
|
||||
if os.path.exists(path) and 1 != idaapi.ask_yn(1, "The file already exists. Overwrite?"):
|
||||
return
|
||||
with open(path, "wb") as save_file:
|
||||
save_file.write(data)
|
||||
|
||||
@@ -277,6 +275,7 @@ class CapaExplorerForm(idaapi.PluginForm):
|
||||
self.view_rulegen_editor = None
|
||||
self.view_rulegen_header_label = None
|
||||
self.view_rulegen_search = None
|
||||
self.view_rulegen_limit_features_by_ea = None
|
||||
self.rulegen_current_function = None
|
||||
self.rulegen_bb_features_cache = {}
|
||||
self.rulegen_func_features_cache = {}
|
||||
@@ -467,6 +466,10 @@ class CapaExplorerForm(idaapi.PluginForm):
|
||||
label2.setText("Editor")
|
||||
label2.setFont(font)
|
||||
|
||||
self.view_rulegen_limit_features_by_ea = QtWidgets.QCheckBox("Limit features to current dissasembly address")
|
||||
self.view_rulegen_limit_features_by_ea.setChecked(False)
|
||||
self.view_rulegen_limit_features_by_ea.stateChanged.connect(self.slot_checkbox_limit_features_by_ea)
|
||||
|
||||
self.view_rulegen_status_label = QtWidgets.QLabel()
|
||||
self.view_rulegen_status_label.setAlignment(QtCore.Qt.AlignLeft)
|
||||
self.view_rulegen_status_label.setText("")
|
||||
@@ -497,6 +500,7 @@ class CapaExplorerForm(idaapi.PluginForm):
|
||||
layout3.addWidget(self.view_rulegen_editor, 65)
|
||||
|
||||
layout2.addWidget(self.view_rulegen_header_label)
|
||||
layout2.addWidget(self.view_rulegen_limit_features_by_ea)
|
||||
layout2.addWidget(self.view_rulegen_search)
|
||||
layout2.addWidget(self.view_rulegen_features)
|
||||
|
||||
@@ -561,6 +565,10 @@ class CapaExplorerForm(idaapi.PluginForm):
|
||||
self.limit_results_to_function(idaapi.get_func(ea))
|
||||
self.view_tree.reset_ui()
|
||||
|
||||
def update_rulegen_tree_limit_features_to_selection(self, ea):
|
||||
""" """
|
||||
self.view_rulegen_features.filter_items_by_ea(ea)
|
||||
|
||||
def ida_hook_screen_ea_changed(self, widget, new_ea, old_ea):
|
||||
"""function hook for IDA "screen ea changed" action
|
||||
|
||||
@@ -581,6 +589,9 @@ class CapaExplorerForm(idaapi.PluginForm):
|
||||
if not idaapi.get_func(new_ea):
|
||||
return
|
||||
|
||||
if self.view_tabs.currentIndex() == 1 and self.view_rulegen_limit_features_by_ea.isChecked():
|
||||
return self.update_rulegen_tree_limit_features_to_selection(new_ea)
|
||||
|
||||
if idaapi.get_func(new_ea) == idaapi.get_func(old_ea):
|
||||
# user navigated same function - ignore
|
||||
return
|
||||
@@ -982,6 +993,7 @@ class CapaExplorerForm(idaapi.PluginForm):
|
||||
self.view_rulegen_editor.reset_view()
|
||||
self.view_rulegen_preview.reset_view()
|
||||
self.view_rulegen_search.clear()
|
||||
self.view_rulegen_limit_features_by_ea.setChecked(False)
|
||||
self.set_rulegen_preview_border_neutral()
|
||||
self.rulegen_current_function = None
|
||||
self.rulegen_func_features_cache = {}
|
||||
@@ -1020,7 +1032,7 @@ class CapaExplorerForm(idaapi.PluginForm):
|
||||
|
||||
def update_rule_status(self, rule_text):
|
||||
""" """
|
||||
if self.view_rulegen_editor.root is None:
|
||||
if not self.view_rulegen_editor.invisibleRootItem().childCount():
|
||||
self.set_rulegen_preview_border_neutral()
|
||||
self.view_rulegen_status_label.clear()
|
||||
return
|
||||
@@ -1140,7 +1152,7 @@ class CapaExplorerForm(idaapi.PluginForm):
|
||||
|
||||
s = json.dumps(self.doc, sort_keys=True, cls=capa.render.json.CapaJsonObjectEncoder).encode("utf-8")
|
||||
|
||||
path = idaapi.ask_file(True, "*.json", "Choose file to save capa program analysis JSON")
|
||||
path = self.ask_user_capa_json_file()
|
||||
if not path:
|
||||
return
|
||||
|
||||
@@ -1173,6 +1185,13 @@ class CapaExplorerForm(idaapi.PluginForm):
|
||||
|
||||
self.view_tree.reset_ui()
|
||||
|
||||
def slot_checkbox_limit_features_by_ea(self, state):
|
||||
""" """
|
||||
if state == QtCore.Qt.Checked:
|
||||
self.view_rulegen_features.filter_items_by_ea(idaapi.get_screen_ea())
|
||||
else:
|
||||
self.view_rulegen_features.show_all_items()
|
||||
|
||||
def slot_checkbox_show_results_by_function_changed(self, state):
|
||||
"""slot activated if checkbox clicked
|
||||
|
||||
@@ -1216,7 +1235,16 @@ class CapaExplorerForm(idaapi.PluginForm):
|
||||
def ask_user_capa_rule_file(self):
|
||||
""" """
|
||||
return QtWidgets.QFileDialog.getSaveFileName(
|
||||
None, "Please select a capa rule to edit", settings.user.get(CAPA_SETTINGS_RULE_PATH, ""), "*.yml"
|
||||
None,
|
||||
"Please select a location to save capa rule file",
|
||||
settings.user.get(CAPA_SETTINGS_RULE_PATH, ""),
|
||||
"*.yml",
|
||||
)[0]
|
||||
|
||||
def ask_user_capa_json_file(self):
|
||||
""" """
|
||||
return QtWidgets.QFileDialog.getSaveFileName(
|
||||
None, "Please select a location to save capa JSON file", "", "*.json"
|
||||
)[0]
|
||||
|
||||
def set_view_status_label(self, text):
|
||||
|
||||
@@ -435,12 +435,18 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
|
||||
for ea in rule["matches"].keys():
|
||||
ea = capa.ida.helpers.get_func_start_ea(ea)
|
||||
if ea is None:
|
||||
# file scope, skip for rendering in this mode
|
||||
# file scope, skip rendering in this mode
|
||||
continue
|
||||
if None is matches_by_function.get(ea, None):
|
||||
matches_by_function[ea] = CapaExplorerFunctionItem(self.root_node, ea, can_check=False)
|
||||
if not matches_by_function.get(ea, ()):
|
||||
# new function root
|
||||
matches_by_function[ea] = (CapaExplorerFunctionItem(self.root_node, ea, can_check=False), [])
|
||||
function_root, match_cache = matches_by_function[ea]
|
||||
if rule["meta"]["name"] in match_cache:
|
||||
# rule match already rendered for this function root, skip it
|
||||
continue
|
||||
match_cache.append(rule["meta"]["name"])
|
||||
CapaExplorerRuleItem(
|
||||
matches_by_function[ea],
|
||||
function_root,
|
||||
rule["meta"]["name"],
|
||||
rule["meta"].get("namespace"),
|
||||
len(rule["matches"]),
|
||||
@@ -556,7 +562,7 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
|
||||
parent, display, source=doc["rules"].get(feature[feature["type"]], {}).get("source", "")
|
||||
)
|
||||
|
||||
if feature["type"] == "regex":
|
||||
if feature["type"] in ("regex", "substring"):
|
||||
for s, locations in feature["matches"].items():
|
||||
if location in locations:
|
||||
return CapaExplorerStringViewItem(
|
||||
@@ -597,6 +603,9 @@ class CapaExplorerDataModel(QtCore.QAbstractItemModel):
|
||||
# display no preview
|
||||
return CapaExplorerFeatureItem(parent, location=location, display=display)
|
||||
|
||||
if feature["type"] in ("arch", "os", "format"):
|
||||
return CapaExplorerFeatureItem(parent, display=display)
|
||||
|
||||
raise RuntimeError("unexpected feature type: " + str(feature["type"]))
|
||||
|
||||
def update_function_name(self, old_name, new_name):
|
||||
|
||||
@@ -9,6 +9,7 @@ import re
|
||||
from collections import Counter
|
||||
|
||||
import idc
|
||||
import idaapi
|
||||
from PyQt5 import QtGui, QtCore, QtWidgets
|
||||
|
||||
import capa.rules
|
||||
@@ -26,7 +27,7 @@ COLOR_GREEN_RGB = (79, 121, 66)
|
||||
COLOR_BLUE_RGB = (37, 147, 215)
|
||||
|
||||
|
||||
def calc_level_by_indent(line, prev_level=0):
|
||||
def calc_indent_from_line(line, prev_level=0):
|
||||
""" """
|
||||
if not len(line.strip()):
|
||||
# blank line, which may occur for comments so we simply use the last level
|
||||
@@ -36,10 +37,13 @@ def calc_level_by_indent(line, prev_level=0):
|
||||
# need to adjust two spaces when encountering string description
|
||||
line = line[2:]
|
||||
# calc line level based on preceding whitespace
|
||||
return len(line) - len(stripped)
|
||||
indent = len(line) - len(stripped)
|
||||
|
||||
# round up to nearest even number; helps keep parsing more sane
|
||||
return indent + (indent % 2)
|
||||
|
||||
|
||||
def parse_feature_for_node(feature):
|
||||
def parse_yaml_line(feature):
|
||||
""" """
|
||||
description = ""
|
||||
comment = ""
|
||||
@@ -112,30 +116,6 @@ def parse_node_for_feature(feature, description, comment, depth):
|
||||
return display if display.endswith("\n") else display + "\n"
|
||||
|
||||
|
||||
def yaml_to_nodes(s):
|
||||
level = 0
|
||||
for line in s.splitlines():
|
||||
feature, description, comment = parse_feature_for_node(line.strip())
|
||||
|
||||
o = QtWidgets.QTreeWidgetItem(None)
|
||||
|
||||
# set node attributes
|
||||
setattr(o, "capa_level", calc_level_by_indent(line, level))
|
||||
|
||||
if feature.startswith(("- and:", "- or:", "- not:", "- basic block:", "- optional:")):
|
||||
setattr(o, "capa_type", CapaExplorerRulgenEditor.get_node_type_expression())
|
||||
elif feature.startswith("#"):
|
||||
setattr(o, "capa_type", CapaExplorerRulgenEditor.get_node_type_comment())
|
||||
else:
|
||||
setattr(o, "capa_type", CapaExplorerRulgenEditor.get_node_type_feature())
|
||||
|
||||
# set node text
|
||||
for (i, v) in enumerate((feature, description, comment)):
|
||||
o.setText(i, v)
|
||||
|
||||
yield o
|
||||
|
||||
|
||||
def iterate_tree(o):
|
||||
""" """
|
||||
itr = QtWidgets.QTreeWidgetItemIterator(o)
|
||||
@@ -144,6 +124,13 @@ def iterate_tree(o):
|
||||
itr += 1
|
||||
|
||||
|
||||
def expand_tree(root):
|
||||
""" """
|
||||
for node in iterate_tree(root):
|
||||
if node.childCount() and not node.isExpanded():
|
||||
node.setExpanded(True)
|
||||
|
||||
|
||||
def calc_item_depth(o):
|
||||
""" """
|
||||
depth = 0
|
||||
@@ -178,6 +165,13 @@ def build_context_menu(o, actions):
|
||||
return menu
|
||||
|
||||
|
||||
def resize_columns_to_content(header):
|
||||
""" """
|
||||
header.resizeSections(QtWidgets.QHeaderView.ResizeToContents)
|
||||
if header.sectionSize(0) > MAX_SECTION_SIZE:
|
||||
header.resizeSection(0, MAX_SECTION_SIZE)
|
||||
|
||||
|
||||
class CapaExplorerRulgenPreview(QtWidgets.QTextEdit):
|
||||
|
||||
INDENT = " " * 2
|
||||
@@ -319,7 +313,6 @@ class CapaExplorerRulgenEditor(QtWidgets.QTreeWidget):
|
||||
self.preview = preview
|
||||
|
||||
self.setHeaderLabels(["Feature", "Description", "Comment"])
|
||||
self.header().setSectionResizeMode(QtWidgets.QHeaderView.ResizeToContents)
|
||||
self.header().setStretchLastSection(False)
|
||||
self.setExpandsOnDoubleClick(False)
|
||||
self.setEditTriggers(QtWidgets.QAbstractItemView.NoEditTriggers)
|
||||
@@ -327,6 +320,10 @@ class CapaExplorerRulgenEditor(QtWidgets.QTreeWidget):
|
||||
self.setSelectionMode(QtWidgets.QAbstractItemView.ExtendedSelection)
|
||||
self.setStyleSheet("QTreeView::item {padding-right: 15 px;padding-bottom: 2 px;}")
|
||||
|
||||
# configure view columns to auto-resize
|
||||
for idx in range(3):
|
||||
self.header().setSectionResizeMode(idx, QtWidgets.QHeaderView.Interactive)
|
||||
|
||||
# enable drag and drop
|
||||
self.setDragEnabled(True)
|
||||
self.setAcceptDrops(True)
|
||||
@@ -336,8 +333,9 @@ class CapaExplorerRulgenEditor(QtWidgets.QTreeWidget):
|
||||
self.itemChanged.connect(self.slot_item_changed)
|
||||
self.customContextMenuRequested.connect(self.slot_custom_context_menu_requested)
|
||||
self.itemDoubleClicked.connect(self.slot_item_double_clicked)
|
||||
self.expanded.connect(self.slot_resize_columns_to_content)
|
||||
self.collapsed.connect(self.slot_resize_columns_to_content)
|
||||
|
||||
self.root = None
|
||||
self.reset_view()
|
||||
|
||||
self.is_editing = False
|
||||
@@ -387,15 +385,17 @@ class CapaExplorerRulgenEditor(QtWidgets.QTreeWidget):
|
||||
|
||||
super(CapaExplorerRulgenEditor, self).dropEvent(e)
|
||||
|
||||
# self.prune_expressions()
|
||||
self.update_preview()
|
||||
self.expandAll()
|
||||
expand_tree(self.invisibleRootItem())
|
||||
|
||||
def reset_view(self):
|
||||
""" """
|
||||
self.root = None
|
||||
self.clear()
|
||||
|
||||
def slot_resize_columns_to_content(self):
|
||||
""" """
|
||||
resize_columns_to_content(self.header())
|
||||
|
||||
def slot_item_changed(self, item, column):
|
||||
""" """
|
||||
if self.is_editing:
|
||||
@@ -405,16 +405,21 @@ class CapaExplorerRulgenEditor(QtWidgets.QTreeWidget):
|
||||
def slot_remove_selected(self, action):
|
||||
""" """
|
||||
for o in self.selectedItems():
|
||||
if o == self.root:
|
||||
if o.parent() is None:
|
||||
# special handling for top-level items
|
||||
self.takeTopLevelItem(self.indexOfTopLevelItem(o))
|
||||
self.root = None
|
||||
continue
|
||||
o.parent().removeChild(o)
|
||||
|
||||
def slot_nest_features(self, action):
|
||||
""" """
|
||||
# create a new parent under root node, by default; new node added last position in tree
|
||||
new_parent = self.new_expression_node(self.root, (action.data()[0], ""))
|
||||
# we don't want to add new features under the invisible root because capa rules should
|
||||
# contain a single top-level node; this may not always be the case so we default to the last
|
||||
# child node that was added to the invisible root
|
||||
top_node = self.invisibleRootItem().child(self.invisibleRootItem().childCount() - 1)
|
||||
|
||||
# create a new parent under top-level node
|
||||
new_parent = self.new_expression_node(top_node, (action.data()[0], ""))
|
||||
|
||||
if "basic block" in action.data()[0]:
|
||||
# add default child expression when nesting under basic block
|
||||
@@ -616,9 +621,14 @@ class CapaExplorerRulgenEditor(QtWidgets.QTreeWidget):
|
||||
|
||||
def update_features(self, features):
|
||||
""" """
|
||||
if not self.root:
|
||||
# root node does not exist, create default node, set expanded
|
||||
self.root = self.new_expression_node(self, ("- or:", ""))
|
||||
if not self.invisibleRootItem().childCount():
|
||||
# empty tree; add a default node
|
||||
self.new_expression_node(self.invisibleRootItem(), ("- or:", ""))
|
||||
|
||||
# we don't want to add new features under the invisible root because capa rules should
|
||||
# contain a single top-level node; this may not always be the case so we default to the last
|
||||
# child node that was added to the invisible root
|
||||
top_node = self.invisibleRootItem().child(self.invisibleRootItem().childCount() - 1)
|
||||
|
||||
# build feature counts
|
||||
counted = list(zip(Counter(features).keys(), Counter(features).values()))
|
||||
@@ -629,7 +639,7 @@ class CapaExplorerRulgenEditor(QtWidgets.QTreeWidget):
|
||||
value = '"%s"' % capa.features.common.escape_string(k.get_value_str())
|
||||
else:
|
||||
value = k.get_value_str()
|
||||
self.new_feature_node(self.root, ("- %s: %s" % (k.name.lower(), value), ""))
|
||||
self.new_feature_node(top_node, ("- %s: %s" % (k.name.lower(), value), ""))
|
||||
|
||||
# n > 1 features
|
||||
for (k, v) in filter(lambda t: t[1] > 1, counted):
|
||||
@@ -641,66 +651,108 @@ class CapaExplorerRulgenEditor(QtWidgets.QTreeWidget):
|
||||
display = "- count(%s(%s)): %d" % (k.name.lower(), value, v)
|
||||
else:
|
||||
display = "- count(%s): %d" % (k.name.lower(), v)
|
||||
self.new_feature_node(self.root, (display, ""))
|
||||
self.new_feature_node(top_node, (display, ""))
|
||||
|
||||
self.expandAll()
|
||||
self.update_preview()
|
||||
expand_tree(self.invisibleRootItem())
|
||||
resize_columns_to_content(self.header())
|
||||
|
||||
def make_child_node_from_feature(self, parent, feature):
|
||||
""" """
|
||||
feature, comment, description = feature
|
||||
|
||||
# we need special handling for the "description" tag; meaning we don't add a new node but simply
|
||||
# set the "description" column for the appropriate parent node
|
||||
if feature.startswith("description:"):
|
||||
if not parent:
|
||||
# we shouldn't have description without a parent; do nothing
|
||||
return None
|
||||
|
||||
# we don't add a new node for description; either set description column of parent's last child
|
||||
# or the parent itself
|
||||
if parent.childCount():
|
||||
parent.child(parent.childCount() - 1).setText(1, feature.lstrip("description:").lstrip())
|
||||
else:
|
||||
parent.setText(1, feature.lstrip("description:").lstrip())
|
||||
return None
|
||||
elif feature.startswith("- description:"):
|
||||
if not parent:
|
||||
# we shouldn't have a description without a parent; do nothing
|
||||
return None
|
||||
|
||||
# we don't add a new node for description; set the description column of the parent instead
|
||||
parent.setText(1, feature.lstrip("- description:").lstrip())
|
||||
return None
|
||||
|
||||
node = QtWidgets.QTreeWidgetItem(parent)
|
||||
|
||||
# set node text to data parsed from feature
|
||||
for (idx, text) in enumerate((feature, comment, description)):
|
||||
node.setText(idx, text)
|
||||
|
||||
# we need to set our own type so we can control the GUI accordingly
|
||||
if feature.startswith(("- and:", "- or:", "- not:", "- basic block:", "- optional:")):
|
||||
setattr(node, "capa_type", CapaExplorerRulgenEditor.get_node_type_expression())
|
||||
elif feature.startswith("#"):
|
||||
setattr(node, "capa_type", CapaExplorerRulgenEditor.get_node_type_comment())
|
||||
else:
|
||||
setattr(node, "capa_type", CapaExplorerRulgenEditor.get_node_type_feature())
|
||||
|
||||
# format the node based on its type
|
||||
(self.set_expression_node, self.set_feature_node, self.set_comment_node)[node.capa_type](node)
|
||||
|
||||
parent.addChild(node)
|
||||
|
||||
return node
|
||||
|
||||
def load_features_from_yaml(self, rule_text, update_preview=False):
|
||||
""" """
|
||||
|
||||
def add_node(parent, node):
|
||||
if node.text(0).startswith("description:"):
|
||||
if parent.childCount():
|
||||
parent.child(parent.childCount() - 1).setText(1, node.text(0).lstrip("description:").lstrip())
|
||||
else:
|
||||
parent.setText(1, node.text(0).lstrip("description:").lstrip())
|
||||
elif node.text(0).startswith("- description:"):
|
||||
parent.setText(1, node.text(0).lstrip("- description:").lstrip())
|
||||
else:
|
||||
parent.addChild(node)
|
||||
|
||||
def build(parent, nodes):
|
||||
if nodes:
|
||||
child_lvl = nodes[0].capa_level
|
||||
while nodes:
|
||||
node = nodes.pop(0)
|
||||
if node.capa_level == child_lvl:
|
||||
add_node(parent, node)
|
||||
elif node.capa_level > child_lvl:
|
||||
nodes.insert(0, node)
|
||||
build(parent.child(parent.childCount() - 1), nodes)
|
||||
else:
|
||||
parent = parent.parent() if parent.parent() else parent
|
||||
add_node(parent, node)
|
||||
|
||||
self.reset_view()
|
||||
|
||||
# check for lack of features block
|
||||
if -1 == rule_text.find("features:"):
|
||||
return
|
||||
|
||||
rule_features = rule_text[rule_text.find("features:") + len("features:") :].strip()
|
||||
rule_nodes = list(yaml_to_nodes(rule_features))
|
||||
rule_features = rule_text[rule_text.find("features:") + len("features:") :].strip("\n")
|
||||
|
||||
# check for lack of nodes
|
||||
if not rule_nodes:
|
||||
if not rule_features:
|
||||
# no features; nothing to do
|
||||
return
|
||||
|
||||
for o in rule_nodes:
|
||||
(self.set_expression_node, self.set_feature_node, self.set_comment_node)[o.capa_type](o)
|
||||
# build tree from yaml text using stack-based algorithm to build parent -> child edges
|
||||
stack = [self.invisibleRootItem()]
|
||||
for line in rule_features.splitlines():
|
||||
if not len(line.strip()):
|
||||
continue
|
||||
|
||||
self.root = rule_nodes.pop(0)
|
||||
self.addTopLevelItem(self.root)
|
||||
indent = calc_indent_from_line(line)
|
||||
|
||||
# we need to grow our stack to ensure proper parent -> child edges
|
||||
if indent > len(stack):
|
||||
stack.extend([None] * (indent - len(stack)))
|
||||
|
||||
# shave the stack; divide by 2 because even indent, add 1 to avoid shaving root node
|
||||
stack[indent // 2 + 1 :] = []
|
||||
|
||||
# find our parent; should be last node in stack not None
|
||||
parent = None
|
||||
for o in stack[::-1]:
|
||||
if o:
|
||||
parent = o
|
||||
break
|
||||
|
||||
node = self.make_child_node_from_feature(parent, parse_yaml_line(line.strip()))
|
||||
|
||||
# append our new node in case its a parent for another node
|
||||
if node:
|
||||
stack.append(node)
|
||||
|
||||
if update_preview:
|
||||
self.preview.blockSignals(True)
|
||||
self.preview.setPlainText(rule_text)
|
||||
self.preview.blockSignals(False)
|
||||
|
||||
build(self.root, rule_nodes)
|
||||
|
||||
self.expandAll()
|
||||
expand_tree(self.invisibleRootItem())
|
||||
|
||||
def get_features(self, selected=False, ignore=()):
|
||||
""" """
|
||||
@@ -736,9 +788,12 @@ class CapaExplorerRulegenFeatures(QtWidgets.QTreeWidget):
|
||||
self.editor = editor
|
||||
|
||||
self.setHeaderLabels(["Feature", "Virtual Address"])
|
||||
self.header().setSectionResizeMode(QtWidgets.QHeaderView.ResizeToContents)
|
||||
self.setStyleSheet("QTreeView::item {padding-right: 15 px;padding-bottom: 2 px;}")
|
||||
|
||||
# configure view columns to auto-resize
|
||||
for idx in range(2):
|
||||
self.header().setSectionResizeMode(idx, QtWidgets.QHeaderView.Interactive)
|
||||
|
||||
self.setExpandsOnDoubleClick(False)
|
||||
self.setContextMenuPolicy(QtCore.Qt.CustomContextMenu)
|
||||
self.setSelectionMode(QtWidgets.QAbstractItemView.ExtendedSelection)
|
||||
@@ -746,6 +801,8 @@ class CapaExplorerRulegenFeatures(QtWidgets.QTreeWidget):
|
||||
# connect slots
|
||||
self.itemDoubleClicked.connect(self.slot_item_double_clicked)
|
||||
self.customContextMenuRequested.connect(self.slot_custom_context_menu_requested)
|
||||
self.expanded.connect(self.slot_resize_columns_to_content)
|
||||
self.collapsed.connect(self.slot_resize_columns_to_content)
|
||||
|
||||
self.reset_view()
|
||||
|
||||
@@ -773,12 +830,24 @@ class CapaExplorerRulegenFeatures(QtWidgets.QTreeWidget):
|
||||
""" """
|
||||
self.clear()
|
||||
|
||||
def slot_resize_columns_to_content(self):
|
||||
""" """
|
||||
resize_columns_to_content(self.header())
|
||||
|
||||
def slot_add_selected_features(self, action):
|
||||
""" """
|
||||
selected = [item.data(0, 0x100) for item in self.selectedItems()]
|
||||
if selected:
|
||||
self.editor.update_features(selected)
|
||||
|
||||
def slot_add_n_bytes_feature(self, action):
|
||||
""" """
|
||||
count = idaapi.ask_long(16, f"Enter number of bytes (1-{capa.features.common.MAX_BYTES_FEATURE_SIZE}):")
|
||||
if count and 1 <= count <= capa.features.common.MAX_BYTES_FEATURE_SIZE:
|
||||
item = self.selectedItems()[0].data(0, 0x100)
|
||||
item.value = item.value[:count]
|
||||
self.editor.update_features([item])
|
||||
|
||||
def slot_custom_context_menu_requested(self, pos):
|
||||
""" """
|
||||
actions = []
|
||||
@@ -790,6 +859,8 @@ class CapaExplorerRulegenFeatures(QtWidgets.QTreeWidget):
|
||||
|
||||
if selected_items_count == 1:
|
||||
action_add_features_fmt = "Add feature"
|
||||
if isinstance(self.selectedItems()[0].data(0, 0x100), capa.features.common.Bytes):
|
||||
actions.append(("Add n bytes...", (), self.slot_add_n_bytes_feature))
|
||||
else:
|
||||
action_add_features_fmt = "Add %d features" % selected_items_count
|
||||
|
||||
@@ -819,13 +890,58 @@ class CapaExplorerRulegenFeatures(QtWidgets.QTreeWidget):
|
||||
if data:
|
||||
to_match = data.get_value_str()
|
||||
if not to_match or text.lower() not in to_match.lower():
|
||||
o.setHidden(True)
|
||||
if not o.isHidden():
|
||||
o.setHidden(True)
|
||||
continue
|
||||
o.setHidden(False)
|
||||
o.setExpanded(True)
|
||||
if o.isHidden():
|
||||
o.setHidden(False)
|
||||
if o.childCount() and not o.isExpanded():
|
||||
o.setExpanded(True)
|
||||
else:
|
||||
self.show_all_items()
|
||||
|
||||
def filter_items_by_ea(self, min_ea, max_ea=None):
|
||||
""" """
|
||||
visited = []
|
||||
|
||||
def show_item_and_parents(_o):
|
||||
"""iteratively show and expand an item and its' parents"""
|
||||
while _o:
|
||||
visited.append(_o)
|
||||
if _o.isHidden():
|
||||
_o.setHidden(False)
|
||||
if _o.childCount() and not _o.isExpanded():
|
||||
_o.setExpanded(True)
|
||||
_o = _o.parent()
|
||||
|
||||
for o in iterate_tree(self):
|
||||
if o in visited:
|
||||
# save some cycles, only visit item once
|
||||
continue
|
||||
|
||||
# read ea from "Address" column
|
||||
o_ea = o.text(CapaExplorerRulegenFeatures.get_column_address_index())
|
||||
|
||||
if o_ea == "":
|
||||
# ea may be empty, hide by default
|
||||
if not o.isHidden():
|
||||
o.setHidden(True)
|
||||
continue
|
||||
|
||||
o_ea = int(o_ea, 16)
|
||||
|
||||
if max_ea is not None and min_ea <= o_ea <= max_ea:
|
||||
show_item_and_parents(o)
|
||||
elif o_ea == min_ea:
|
||||
show_item_and_parents(o)
|
||||
else:
|
||||
# made it here, hide by default
|
||||
if not o.isHidden():
|
||||
o.setHidden(True)
|
||||
|
||||
# resize the view for UX
|
||||
resize_columns_to_content(self.header())
|
||||
|
||||
def style_parent_node(self, o):
|
||||
""" """
|
||||
font = QtGui.QFont()
|
||||
@@ -887,6 +1003,7 @@ class CapaExplorerRulegenFeatures(QtWidgets.QTreeWidget):
|
||||
self.parse_features_for_tree(self.new_parent_node(self, ("File Scope",)), file_features)
|
||||
if func_features:
|
||||
self.parse_features_for_tree(self.new_parent_node(self, ("Function/Basic Block Scope",)), func_features)
|
||||
resize_columns_to_content(self.header())
|
||||
|
||||
def parse_features_for_tree(self, parent, features):
|
||||
""" """
|
||||
@@ -931,7 +1048,11 @@ class CapaExplorerRulegenFeatures(QtWidgets.QTreeWidget):
|
||||
self.parent_items[feature], (format_feature(feature), format_address(ea)), feature=feature
|
||||
)
|
||||
else:
|
||||
ea = eas.pop()
|
||||
if eas:
|
||||
ea = eas.pop()
|
||||
else:
|
||||
# some features may not have an address e.g. "format"
|
||||
ea = ""
|
||||
for (i, v) in enumerate((format_feature(feature), format_address(ea))):
|
||||
self.parent_items[feature].setText(i, v)
|
||||
self.parent_items[feature].setData(0, 0x100, feature)
|
||||
@@ -1000,11 +1121,7 @@ class CapaExplorerQtreeView(QtWidgets.QTreeView):
|
||||
def slot_resize_columns_to_content(self):
|
||||
"""reset view columns to contents"""
|
||||
if self.should_resize_columns:
|
||||
self.header().resizeSections(QtWidgets.QHeaderView.ResizeToContents)
|
||||
|
||||
# limit size of first section
|
||||
if self.header().sectionSize(0) > MAX_SECTION_SIZE:
|
||||
self.header().resizeSection(0, MAX_SECTION_SIZE)
|
||||
resize_columns_to_content(self.header())
|
||||
|
||||
def map_index_to_source_item(self, model_index):
|
||||
"""map proxy model index to source model item
|
||||
|
||||
279
capa/main.py
279
capa/main.py
@@ -10,8 +10,6 @@ See the License for the specific language governing permissions and limitations
|
||||
"""
|
||||
import os
|
||||
import sys
|
||||
import gzip
|
||||
import time
|
||||
import hashlib
|
||||
import logging
|
||||
import os.path
|
||||
@@ -19,13 +17,14 @@ import argparse
|
||||
import datetime
|
||||
import textwrap
|
||||
import itertools
|
||||
import contextlib
|
||||
import collections
|
||||
from typing import Any, Dict, List, Tuple
|
||||
|
||||
import halo
|
||||
import tqdm
|
||||
import colorama
|
||||
from pefile import PEFormatError
|
||||
from elftools.common.exceptions import ELFError
|
||||
|
||||
import capa.rules
|
||||
import capa.engine
|
||||
@@ -37,7 +36,9 @@ import capa.features.common
|
||||
import capa.features.freeze
|
||||
import capa.render.vverbose
|
||||
import capa.features.extractors
|
||||
import capa.features.extractors.common
|
||||
import capa.features.extractors.pefile
|
||||
import capa.features.extractors.elffile
|
||||
from capa.rules import Rule, RuleSet
|
||||
from capa.engine import FeatureSet, MatchResults
|
||||
from capa.helpers import get_file_taste
|
||||
@@ -45,7 +46,6 @@ from capa.features.extractors.base_extractor import FunctionHandle, FeatureExtra
|
||||
|
||||
RULES_PATH_DEFAULT_STRING = "(embedded rules)"
|
||||
SIGNATURES_PATH_DEFAULT_STRING = "(embedded signatures)"
|
||||
SUPPORTED_FILE_MAGIC = set([b"MZ"])
|
||||
BACKEND_VIV = "vivisect"
|
||||
BACKEND_SMDA = "smda"
|
||||
EXTENSIONS_SHELLCODE_32 = ("sc32", "raw32")
|
||||
@@ -55,14 +55,6 @@ EXTENSIONS_SHELLCODE_64 = ("sc64", "raw64")
|
||||
logger = logging.getLogger("capa")
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def timing(msg: str):
|
||||
t0 = time.time()
|
||||
yield
|
||||
t1 = time.time()
|
||||
logger.debug("perf: %s: %0.2fs", msg, t1 - t0)
|
||||
|
||||
|
||||
def set_vivisect_log_level(level):
|
||||
logging.getLogger("vivisect").setLevel(level)
|
||||
logging.getLogger("vivisect.base").setLevel(level)
|
||||
@@ -79,7 +71,7 @@ def find_function_capabilities(ruleset: RuleSet, extractor: FeatureExtractor, f:
|
||||
function_features = collections.defaultdict(set) # type: FeatureSet
|
||||
bb_matches = collections.defaultdict(list) # type: MatchResults
|
||||
|
||||
for feature, va in extractor.extract_function_features(f):
|
||||
for feature, va in itertools.chain(extractor.extract_function_features(f), extractor.extract_global_features()):
|
||||
function_features[feature].add(va)
|
||||
|
||||
for bb in extractor.get_basic_blocks(f):
|
||||
@@ -88,12 +80,16 @@ def find_function_capabilities(ruleset: RuleSet, extractor: FeatureExtractor, f:
|
||||
# - basic blocks
|
||||
bb_features = collections.defaultdict(set)
|
||||
|
||||
for feature, va in extractor.extract_basic_block_features(f, bb):
|
||||
for feature, va in itertools.chain(
|
||||
extractor.extract_basic_block_features(f, bb), extractor.extract_global_features()
|
||||
):
|
||||
bb_features[feature].add(va)
|
||||
function_features[feature].add(va)
|
||||
|
||||
for insn in extractor.get_instructions(f, bb):
|
||||
for feature, va in extractor.extract_insn_features(f, bb, insn):
|
||||
for feature, va in itertools.chain(
|
||||
extractor.extract_insn_features(f, bb, insn), extractor.extract_global_features()
|
||||
):
|
||||
bb_features[feature].add(va)
|
||||
function_features[feature].add(va)
|
||||
|
||||
@@ -101,8 +97,9 @@ def find_function_capabilities(ruleset: RuleSet, extractor: FeatureExtractor, f:
|
||||
|
||||
for rule_name, res in matches.items():
|
||||
bb_matches[rule_name].extend(res)
|
||||
rule = ruleset[rule_name]
|
||||
for va, _ in res:
|
||||
function_features[capa.features.common.MatchedRule(rule_name)].add(va)
|
||||
capa.engine.index_rule_matches(function_features, rule, [va])
|
||||
|
||||
_, function_matches = capa.engine.match(ruleset.function_rules, function_features, int(f))
|
||||
return function_matches, bb_matches, len(function_features)
|
||||
@@ -111,7 +108,7 @@ def find_function_capabilities(ruleset: RuleSet, extractor: FeatureExtractor, f:
|
||||
def find_file_capabilities(ruleset: RuleSet, extractor: FeatureExtractor, function_features: FeatureSet):
|
||||
file_features = collections.defaultdict(set) # type: FeatureSet
|
||||
|
||||
for feature, va in extractor.extract_file_features():
|
||||
for feature, va in itertools.chain(extractor.extract_file_features(), extractor.extract_global_features()):
|
||||
# not all file features may have virtual addresses.
|
||||
# if not, then at least ensure the feature shows up in the index.
|
||||
# the set of addresses will still be empty.
|
||||
@@ -175,10 +172,11 @@ def find_capabilities(ruleset: RuleSet, extractor: FeatureExtractor, disable_pro
|
||||
|
||||
# collection of features that captures the rule matches within function and BB scopes.
|
||||
# mapping from feature (matched rule) to set of addresses at which it matched.
|
||||
function_and_lower_features = {
|
||||
capa.features.common.MatchedRule(rule_name): set(map(lambda p: p[0], results))
|
||||
for rule_name, results in itertools.chain(all_function_matches.items(), all_bb_matches.items())
|
||||
} # type: FeatureSet
|
||||
function_and_lower_features: FeatureSet = collections.defaultdict(set)
|
||||
for rule_name, results in itertools.chain(all_function_matches.items(), all_bb_matches.items()):
|
||||
locations = set(map(lambda p: p[0], results))
|
||||
rule = ruleset[rule_name]
|
||||
capa.engine.index_rule_matches(function_and_lower_features, rule, locations)
|
||||
|
||||
all_file_matches, feature_count = find_file_capabilities(ruleset, extractor, function_and_lower_features)
|
||||
meta["feature_counts"]["file"] = feature_count
|
||||
@@ -235,50 +233,61 @@ def has_file_limitation(rules: RuleSet, capabilities: MatchResults, is_standalon
|
||||
return False
|
||||
|
||||
|
||||
def is_supported_file_type(sample: str) -> bool:
|
||||
def is_supported_format(sample: str) -> bool:
|
||||
"""
|
||||
Return if this is a supported file based on magic header values
|
||||
"""
|
||||
with open(sample, "rb") as f:
|
||||
magic = f.read(2)
|
||||
if magic in SUPPORTED_FILE_MAGIC:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
taste = f.read(0x100)
|
||||
|
||||
return len(list(capa.features.extractors.common.extract_format(taste))) == 1
|
||||
|
||||
|
||||
SHELLCODE_BASE = 0x690000
|
||||
|
||||
|
||||
def get_shellcode_vw(sample, arch="auto"):
|
||||
"""
|
||||
Return shellcode workspace using explicit arch or via auto detect.
|
||||
The workspace is *not* analyzed nor saved. Its up to the caller to do this.
|
||||
Then, they can register FLIRT analyzers or decide not to write to disk.
|
||||
"""
|
||||
import viv_utils
|
||||
|
||||
def get_format(sample: str) -> str:
|
||||
with open(sample, "rb") as f:
|
||||
sample_bytes = f.read()
|
||||
buf = f.read()
|
||||
|
||||
if arch == "auto":
|
||||
# choose arch with most functions, idea by Jay G.
|
||||
vw_cands = []
|
||||
for arch in ["i386", "amd64"]:
|
||||
vw_cands.append(
|
||||
viv_utils.getShellcodeWorkspace(
|
||||
sample_bytes, arch, base=SHELLCODE_BASE, analyze=False, should_save=False
|
||||
)
|
||||
)
|
||||
if not vw_cands:
|
||||
raise ValueError("could not generate vivisect workspace")
|
||||
vw = max(vw_cands, key=lambda vw: len(vw.getFunctions()))
|
||||
else:
|
||||
vw = viv_utils.getShellcodeWorkspace(sample_bytes, arch, base=SHELLCODE_BASE, analyze=False, should_save=False)
|
||||
for feature, _ in capa.features.extractors.common.extract_format(buf):
|
||||
assert isinstance(feature.value, str)
|
||||
return feature.value
|
||||
|
||||
vw.setMeta("StorageName", "%s.viv" % sample)
|
||||
return "unknown"
|
||||
|
||||
return vw
|
||||
|
||||
def is_supported_arch(sample: str) -> bool:
|
||||
with open(sample, "rb") as f:
|
||||
buf = f.read()
|
||||
|
||||
return len(list(capa.features.extractors.common.extract_arch(buf))) == 1
|
||||
|
||||
|
||||
def get_arch(sample: str) -> str:
|
||||
with open(sample, "rb") as f:
|
||||
buf = f.read()
|
||||
|
||||
for feature, _ in capa.features.extractors.common.extract_arch(buf):
|
||||
assert isinstance(feature.value, str)
|
||||
return feature.value
|
||||
|
||||
return "unknown"
|
||||
|
||||
|
||||
def is_supported_os(sample: str) -> bool:
|
||||
with open(sample, "rb") as f:
|
||||
buf = f.read()
|
||||
|
||||
return len(list(capa.features.extractors.common.extract_os(buf))) == 1
|
||||
|
||||
|
||||
def get_os(sample: str) -> str:
|
||||
with open(sample, "rb") as f:
|
||||
buf = f.read()
|
||||
|
||||
for feature, _ in capa.features.extractors.common.extract_os(buf):
|
||||
assert isinstance(feature.value, str)
|
||||
return feature.value
|
||||
|
||||
return "unknown"
|
||||
|
||||
|
||||
def get_meta_str(vw):
|
||||
@@ -292,58 +301,6 @@ def get_meta_str(vw):
|
||||
return "%s, number of functions: %d" % (", ".join(meta), len(vw.getFunctions()))
|
||||
|
||||
|
||||
def load_flirt_signature(path):
|
||||
# lazy import enables us to only require flirt here and not in IDA, for example
|
||||
import flirt
|
||||
|
||||
if path.endswith(".sig"):
|
||||
with open(path, "rb") as f:
|
||||
with timing("flirt: parsing .sig: " + path):
|
||||
sigs = flirt.parse_sig(f.read())
|
||||
|
||||
elif path.endswith(".pat"):
|
||||
with open(path, "rb") as f:
|
||||
with timing("flirt: parsing .pat: " + path):
|
||||
sigs = flirt.parse_pat(f.read().decode("utf-8").replace("\r\n", "\n"))
|
||||
|
||||
elif path.endswith(".pat.gz"):
|
||||
with gzip.open(path, "rb") as f:
|
||||
with timing("flirt: parsing .pat.gz: " + path):
|
||||
sigs = flirt.parse_pat(f.read().decode("utf-8").replace("\r\n", "\n"))
|
||||
|
||||
else:
|
||||
raise ValueError("unexpect signature file extension: " + path)
|
||||
|
||||
return sigs
|
||||
|
||||
|
||||
def register_flirt_signature_analyzers(vw, sigpaths):
|
||||
"""
|
||||
args:
|
||||
vw (vivisect.VivWorkspace):
|
||||
sigpaths (List[str]): file system paths of .sig/.pat files
|
||||
"""
|
||||
# lazy import enables us to only require flirt here and not in IDA, for example
|
||||
import flirt
|
||||
import viv_utils.flirt
|
||||
|
||||
for sigpath in sigpaths:
|
||||
try:
|
||||
sigs = load_flirt_signature(sigpath)
|
||||
except ValueError as e:
|
||||
logger.warning("could not load %s: %s", sigpath, str(e))
|
||||
continue
|
||||
|
||||
logger.debug("flirt: sig count: %d", len(sigs))
|
||||
|
||||
with timing("flirt: compiling sigs"):
|
||||
matcher = flirt.compile(sigs)
|
||||
|
||||
analyzer = viv_utils.flirt.FlirtFunctionAnalyzer(matcher, sigpath)
|
||||
logger.debug("registering viv function analyzer: %s", repr(analyzer))
|
||||
viv_utils.flirt.addFlirtFunctionAnalyzer(vw, analyzer)
|
||||
|
||||
|
||||
def is_running_standalone() -> bool:
|
||||
"""
|
||||
are we running from a PyInstaller'd executable?
|
||||
@@ -389,6 +346,14 @@ class UnsupportedFormatError(ValueError):
|
||||
pass
|
||||
|
||||
|
||||
class UnsupportedArchError(ValueError):
|
||||
pass
|
||||
|
||||
|
||||
class UnsupportedOSError(ValueError):
|
||||
pass
|
||||
|
||||
|
||||
def get_workspace(path, format, sigpaths):
|
||||
"""
|
||||
load the program at the given path into a vivisect workspace using the given format.
|
||||
@@ -396,8 +361,9 @@ def get_workspace(path, format, sigpaths):
|
||||
|
||||
supported formats:
|
||||
- pe
|
||||
- sc32
|
||||
- sc64
|
||||
- elf
|
||||
- shellcode 32-bit
|
||||
- shellcode 64-bit
|
||||
- auto
|
||||
|
||||
this creates and analyzes the workspace; however, it does *not* save the workspace.
|
||||
@@ -409,22 +375,22 @@ def get_workspace(path, format, sigpaths):
|
||||
|
||||
logger.debug("generating vivisect workspace for: %s", path)
|
||||
if format == "auto":
|
||||
if not is_supported_file_type(path):
|
||||
if not is_supported_format(path):
|
||||
raise UnsupportedFormatError()
|
||||
|
||||
# don't analyze, so that we can add our Flirt function analyzer first.
|
||||
vw = viv_utils.getWorkspace(path, analyze=False, should_save=False)
|
||||
elif format == "pe":
|
||||
elif format in {"pe", "elf"}:
|
||||
vw = viv_utils.getWorkspace(path, analyze=False, should_save=False)
|
||||
elif format == "sc32":
|
||||
# these are not analyzed nor saved.
|
||||
vw = get_shellcode_vw(path, arch="i386")
|
||||
vw = viv_utils.getShellcodeWorkspaceFromFile(path, arch="i386", analyze=False)
|
||||
elif format == "sc64":
|
||||
vw = get_shellcode_vw(path, arch="amd64")
|
||||
vw = viv_utils.getShellcodeWorkspaceFromFile(path, arch="amd64", analyze=False)
|
||||
else:
|
||||
raise ValueError("unexpected format: " + format)
|
||||
|
||||
register_flirt_signature_analyzers(vw, sigpaths)
|
||||
viv_utils.flirt.register_flirt_signature_analyzers(vw, sigpaths)
|
||||
|
||||
vw.analyze()
|
||||
|
||||
@@ -441,8 +407,20 @@ def get_extractor(
|
||||
) -> FeatureExtractor:
|
||||
"""
|
||||
raises:
|
||||
UnsupportedFormatError:
|
||||
UnsupportedFormatError
|
||||
UnsupportedArchError
|
||||
UnsupportedOSError
|
||||
"""
|
||||
if format not in ("sc32", "sc64"):
|
||||
if not is_supported_format(path):
|
||||
raise UnsupportedFormatError()
|
||||
|
||||
if not is_supported_arch(path):
|
||||
raise UnsupportedArchError()
|
||||
|
||||
if not is_supported_os(path):
|
||||
raise UnsupportedOSError()
|
||||
|
||||
if backend == "smda":
|
||||
from smda.SmdaConfig import SmdaConfig
|
||||
from smda.Disassembler import Disassembler
|
||||
@@ -461,10 +439,6 @@ def get_extractor(
|
||||
import capa.features.extractors.viv.extractor
|
||||
|
||||
with halo.Halo(text="analyzing program", spinner="simpleDots", stream=sys.stderr, enabled=not disable_progress):
|
||||
if format == "auto" and path.endswith(EXTENSIONS_SHELLCODE_32):
|
||||
format = "sc32"
|
||||
elif format == "auto" and path.endswith(EXTENSIONS_SHELLCODE_64):
|
||||
format = "sc64"
|
||||
vw = get_workspace(path, format, sigpaths)
|
||||
|
||||
if should_save_workspace:
|
||||
@@ -572,7 +546,7 @@ def get_signatures(sigs_path):
|
||||
return paths
|
||||
|
||||
|
||||
def collect_metadata(argv, sample_path, rules_path, format, extractor):
|
||||
def collect_metadata(argv, sample_path, rules_path, extractor):
|
||||
md5 = hashlib.md5()
|
||||
sha1 = hashlib.sha1()
|
||||
sha256 = hashlib.sha256()
|
||||
@@ -587,6 +561,10 @@ def collect_metadata(argv, sample_path, rules_path, format, extractor):
|
||||
if rules_path != RULES_PATH_DEFAULT_STRING:
|
||||
rules_path = os.path.abspath(os.path.normpath(rules_path))
|
||||
|
||||
format = get_format(sample_path)
|
||||
arch = get_arch(sample_path)
|
||||
os_ = get_os(sample_path)
|
||||
|
||||
return {
|
||||
"timestamp": datetime.datetime.now().isoformat(),
|
||||
"version": capa.version.__version__,
|
||||
@@ -599,6 +577,8 @@ def collect_metadata(argv, sample_path, rules_path, format, extractor):
|
||||
},
|
||||
"analysis": {
|
||||
"format": format,
|
||||
"arch": arch,
|
||||
"os": os_,
|
||||
"extractor": extractor.__class__.__name__,
|
||||
"rules": rules_path,
|
||||
"base_address": extractor.get_base_address(),
|
||||
@@ -668,6 +648,7 @@ def install_common_args(parser, wanted=None):
|
||||
formats = [
|
||||
("auto", "(default) detect file type automatically"),
|
||||
("pe", "Windows PE file"),
|
||||
("elf", "Executable and Linkable Format"),
|
||||
("sc32", "32-bit shellcode"),
|
||||
("sc64", "64-bit shellcode"),
|
||||
("freeze", "features previously frozen by capa"),
|
||||
@@ -871,19 +852,34 @@ def main(argv=None):
|
||||
logger.error("%s", str(e))
|
||||
return -1
|
||||
|
||||
file_extractor = None
|
||||
if args.format == "pe" or (args.format == "auto" and taste.startswith(b"MZ")):
|
||||
# this pefile file feature extractor is pretty light weight: it doesn't do any code analysis.
|
||||
# so we can fairly quickly determine if the given PE file has "pure" file-scope rules
|
||||
# these pefile and elffile file feature extractors are pretty light weight: they don't do any code analysis.
|
||||
# so we can fairly quickly determine if the given file has "pure" file-scope rules
|
||||
# that indicate a limitation (like "file is packed based on section names")
|
||||
# and avoid doing a full code analysis on difficult/impossible binaries.
|
||||
try:
|
||||
from pefile import PEFormatError
|
||||
|
||||
file_extractor = capa.features.extractors.pefile.PefileFeatureExtractor(args.sample)
|
||||
except PEFormatError as e:
|
||||
logger.error("Input file '%s' is not a valid PE file: %s", args.sample, str(e))
|
||||
return -1
|
||||
pure_file_capabilities, _ = find_file_capabilities(rules, file_extractor, {})
|
||||
|
||||
elif args.format == "elf" or (args.format == "auto" and taste.startswith(b"\x7fELF")):
|
||||
try:
|
||||
file_extractor = capa.features.extractors.elffile.ElfFeatureExtractor(args.sample)
|
||||
except (ELFError, OverflowError) as e:
|
||||
logger.error("Input file '%s' is not a valid ELF file: %s", args.sample, str(e))
|
||||
return -1
|
||||
|
||||
if file_extractor:
|
||||
try:
|
||||
pure_file_capabilities, _ = find_file_capabilities(rules, file_extractor, {})
|
||||
except PEFormatError as e:
|
||||
logger.error("Input file '%s' is not a valid PE file: %s", args.sample, str(e))
|
||||
return -1
|
||||
except (ELFError, OverflowError) as e:
|
||||
logger.error("Input file '%s' is not a valid ELF file: %s", args.sample, str(e))
|
||||
return -1
|
||||
|
||||
# file limitations that rely on non-file scope won't be detected here.
|
||||
# nor on FunctionName features, because pefile doesn't support this.
|
||||
@@ -895,7 +891,11 @@ def main(argv=None):
|
||||
return -1
|
||||
|
||||
try:
|
||||
sig_paths = get_signatures(args.signatures)
|
||||
if args.format == "pe" or (args.format == "auto" and taste.startswith(b"MZ")):
|
||||
sig_paths = get_signatures(args.signatures)
|
||||
else:
|
||||
sig_paths = []
|
||||
logger.debug("skipping library code matching: only have PE signatures")
|
||||
except (IOError) as e:
|
||||
logger.error("%s", str(e))
|
||||
return -1
|
||||
@@ -906,6 +906,11 @@ def main(argv=None):
|
||||
extractor = capa.features.freeze.load(f.read())
|
||||
else:
|
||||
format = args.format
|
||||
if format == "auto" and args.sample.endswith(EXTENSIONS_SHELLCODE_32):
|
||||
format = "sc32"
|
||||
elif format == "auto" and args.sample.endswith(EXTENSIONS_SHELLCODE_64):
|
||||
format = "sc64"
|
||||
|
||||
should_save_workspace = os.environ.get("CAPA_SAVE_WORKSPACE") not in ("0", "no", "NO", "n", None)
|
||||
|
||||
try:
|
||||
@@ -914,16 +919,32 @@ def main(argv=None):
|
||||
)
|
||||
except UnsupportedFormatError:
|
||||
logger.error("-" * 80)
|
||||
logger.error(" Input file does not appear to be a PE file.")
|
||||
logger.error(" Input file does not appear to be a PE or ELF file.")
|
||||
logger.error(" ")
|
||||
logger.error(
|
||||
" capa currently only supports analyzing PE files (or shellcode, when using --format sc32|sc64)."
|
||||
" capa currently only supports analyzing PE and ELF files (or shellcode, when using --format sc32|sc64)."
|
||||
)
|
||||
logger.error(" If you don't know the input file type, you can try using the `file` utility to guess it.")
|
||||
logger.error("-" * 80)
|
||||
return -1
|
||||
except UnsupportedArchError:
|
||||
logger.error("-" * 80)
|
||||
logger.error(" Input file does not appear to target a supported architecture.")
|
||||
logger.error(" ")
|
||||
logger.error(" capa currently only supports analyzing x86 (32- and 64-bit).")
|
||||
logger.error("-" * 80)
|
||||
return -1
|
||||
except UnsupportedOSError:
|
||||
logger.error("-" * 80)
|
||||
logger.error(" Input file does not appear to target a supported OS.")
|
||||
logger.error(" ")
|
||||
logger.error(
|
||||
" capa currently only supports analyzing executables for some operating systems (including Windows and Linux)."
|
||||
)
|
||||
logger.error("-" * 80)
|
||||
return -1
|
||||
|
||||
meta = collect_metadata(argv, args.sample, args.rules, format, extractor)
|
||||
meta = collect_metadata(argv, args.sample, args.rules, extractor)
|
||||
|
||||
capabilities, counts = find_capabilities(rules, extractor, disable_progress=args.quiet)
|
||||
meta["analysis"].update(counts)
|
||||
|
||||
@@ -7,7 +7,6 @@
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
import collections
|
||||
from typing import Dict, List
|
||||
|
||||
import tabulate
|
||||
|
||||
@@ -33,6 +32,9 @@ def render_meta(doc, ostream: StringIO):
|
||||
(width("md5", 22), width(doc["meta"]["sample"]["md5"], 82)),
|
||||
("sha1", doc["meta"]["sample"]["sha1"]),
|
||||
("sha256", doc["meta"]["sample"]["sha256"]),
|
||||
("os", doc["meta"]["analysis"]["os"]),
|
||||
("format", doc["meta"]["analysis"]["format"]),
|
||||
("arch", doc["meta"]["analysis"]["arch"]),
|
||||
("path", doc["meta"]["sample"]["path"]),
|
||||
]
|
||||
|
||||
|
||||
@@ -74,7 +74,7 @@ def convert_feature_to_result_document(feature):
|
||||
result = {"type": feature.name, feature.name: feature.get_value_str()}
|
||||
if feature.description:
|
||||
result["description"] = feature.description
|
||||
if feature.name == "regex":
|
||||
if feature.name in ("regex", "substring"):
|
||||
result["matches"] = feature.matches
|
||||
return result
|
||||
|
||||
@@ -198,9 +198,19 @@ def convert_match_to_result_document(rules, capabilities, result):
|
||||
# in the meantime, the above might be sufficient.
|
||||
rule_matches = {address: result for (address, result) in capabilities[rule.name]}
|
||||
for location in doc["locations"]:
|
||||
doc["children"].append(
|
||||
convert_match_to_result_document(rules, capabilities, rule_matches[location])
|
||||
)
|
||||
# doc[locations] contains all matches for the given namespace.
|
||||
# for example, the feature might be `match: anti-analysis/packer`
|
||||
# which matches against "generic unpacker" and "UPX".
|
||||
# in this case, doc[locations] contains locations for *both* of thse.
|
||||
#
|
||||
# rule_matches contains the matches for the specific rule.
|
||||
# this is a subset of doc[locations].
|
||||
#
|
||||
# so, grab only the locations for current rule.
|
||||
if location in rule_matches:
|
||||
doc["children"].append(
|
||||
convert_match_to_result_document(rules, capabilities, rule_matches[location])
|
||||
)
|
||||
|
||||
return doc
|
||||
|
||||
|
||||
@@ -41,7 +41,9 @@ def render_meta(ostream, doc):
|
||||
path /tmp/suspicious.dll_
|
||||
timestamp 2020-07-03T10:17:05.796933
|
||||
capa version 0.0.0
|
||||
format auto
|
||||
os windows
|
||||
format pe
|
||||
arch amd64
|
||||
extractor VivisectFeatureExtractor
|
||||
base address 0x10000000
|
||||
rules (embedded rules)
|
||||
@@ -55,7 +57,9 @@ def render_meta(ostream, doc):
|
||||
("path", doc["meta"]["sample"]["path"]),
|
||||
("timestamp", doc["meta"]["timestamp"]),
|
||||
("capa version", doc["meta"]["version"]),
|
||||
("os", doc["meta"]["analysis"]["os"]),
|
||||
("format", doc["meta"]["analysis"]["format"]),
|
||||
("arch", doc["meta"]["analysis"]["arch"]),
|
||||
("extractor", doc["meta"]["analysis"]["extractor"]),
|
||||
("base address", hex(doc["meta"]["analysis"]["base_address"])),
|
||||
("rules", doc["meta"]["analysis"]["rules"]),
|
||||
|
||||
@@ -97,7 +97,7 @@ def render_feature(ostream, match, feature, indent=0):
|
||||
key = feature["type"]
|
||||
value = feature[feature["type"]]
|
||||
|
||||
if key != "regex":
|
||||
if key not in ("regex", "substring"):
|
||||
# like:
|
||||
# number: 10 = SOME_CONSTANT @ 0x401000
|
||||
if key == "string":
|
||||
@@ -113,7 +113,8 @@ def render_feature(ostream, match, feature, indent=0):
|
||||
ostream.write(capa.rules.DESCRIPTION_SEPARATOR)
|
||||
ostream.write(feature["description"])
|
||||
|
||||
render_locations(ostream, match)
|
||||
if key not in ("os", "arch"):
|
||||
render_locations(ostream, match)
|
||||
ostream.write("\n")
|
||||
else:
|
||||
# like:
|
||||
|
||||
@@ -22,7 +22,7 @@ except ImportError:
|
||||
# https://github.com/python/mypy/issues/1153
|
||||
from backports.functools_lru_cache import lru_cache # type: ignore
|
||||
|
||||
from typing import Any, Set, Dict, List, Union, Iterator
|
||||
from typing import Any, Dict, List, Union, Iterator
|
||||
|
||||
import yaml
|
||||
import ruamel.yaml
|
||||
@@ -78,6 +78,9 @@ SUPPORTED_FEATURES = {
|
||||
capa.features.file.FunctionName,
|
||||
capa.features.common.Characteristic("embedded pe"),
|
||||
capa.features.common.String,
|
||||
capa.features.common.Format,
|
||||
capa.features.common.OS,
|
||||
capa.features.common.Arch,
|
||||
},
|
||||
FUNCTION_SCOPE: {
|
||||
# plus basic block scope features, see below
|
||||
@@ -86,6 +89,8 @@ SUPPORTED_FEATURES = {
|
||||
capa.features.common.Characteristic("calls to"),
|
||||
capa.features.common.Characteristic("loop"),
|
||||
capa.features.common.Characteristic("recursive call"),
|
||||
capa.features.common.OS,
|
||||
capa.features.common.Arch,
|
||||
},
|
||||
BASIC_BLOCK_SCOPE: {
|
||||
capa.features.common.MatchedRule,
|
||||
@@ -103,6 +108,8 @@ SUPPORTED_FEATURES = {
|
||||
capa.features.common.Characteristic("tight loop"),
|
||||
capa.features.common.Characteristic("stack string"),
|
||||
capa.features.common.Characteristic("indirect call"),
|
||||
capa.features.common.OS,
|
||||
capa.features.common.Arch,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -153,14 +160,14 @@ def ensure_feature_valid_for_scope(scope: str, feature: Union[Feature, Statement
|
||||
and isinstance(feature.value, str)
|
||||
and capa.features.common.Characteristic(feature.value) not in SUPPORTED_FEATURES[scope]
|
||||
):
|
||||
raise InvalidRule("feature %s not support for scope %s" % (feature, scope))
|
||||
raise InvalidRule("feature %s not supported for scope %s" % (feature, scope))
|
||||
|
||||
if not isinstance(feature, capa.features.common.Characteristic):
|
||||
# features of this scope that are not Characteristics will be Type instances.
|
||||
# check that the given feature is one of these types.
|
||||
types_for_scope = filter(lambda t: isinstance(t, type), SUPPORTED_FEATURES[scope])
|
||||
if not isinstance(feature, tuple(types_for_scope)): # type: ignore
|
||||
raise InvalidRule("feature %s not support for scope %s" % (feature, scope))
|
||||
raise InvalidRule("feature %s not supported for scope %s" % (feature, scope))
|
||||
|
||||
|
||||
def parse_int(s: str) -> int:
|
||||
@@ -212,24 +219,26 @@ def parse_feature(key: str):
|
||||
return capa.features.insn.API
|
||||
elif key == "string":
|
||||
return capa.features.common.StringFactory
|
||||
elif key == "substring":
|
||||
return capa.features.common.Substring
|
||||
elif key == "bytes":
|
||||
return capa.features.common.Bytes
|
||||
elif key == "number":
|
||||
return capa.features.insn.Number
|
||||
elif key.startswith("number/"):
|
||||
arch = key.partition("/")[2]
|
||||
bitness = key.partition("/")[2]
|
||||
# the other handlers here return constructors for features,
|
||||
# and we want to as well,
|
||||
# however, we need to preconfigure one of the arguments (`arch`).
|
||||
# however, we need to preconfigure one of the arguments (`bitness`).
|
||||
# so, instead we return a partially-applied function that
|
||||
# provides `arch` to the feature constructor.
|
||||
# provides `bitness` to the feature constructor.
|
||||
# it forwards any other arguments provided to the closure along to the constructor.
|
||||
return functools.partial(capa.features.insn.Number, arch=arch)
|
||||
return functools.partial(capa.features.insn.Number, bitness=bitness)
|
||||
elif key == "offset":
|
||||
return capa.features.insn.Offset
|
||||
elif key.startswith("offset/"):
|
||||
arch = key.partition("/")[2]
|
||||
return functools.partial(capa.features.insn.Offset, arch=arch)
|
||||
bitness = key.partition("/")[2]
|
||||
return functools.partial(capa.features.insn.Offset, bitness=bitness)
|
||||
elif key == "mnemonic":
|
||||
return capa.features.insn.Mnemonic
|
||||
elif key == "basic blocks":
|
||||
@@ -246,6 +255,13 @@ def parse_feature(key: str):
|
||||
return capa.features.common.MatchedRule
|
||||
elif key == "function-name":
|
||||
return capa.features.file.FunctionName
|
||||
elif key == "os":
|
||||
return capa.features.common.OS
|
||||
elif key == "format":
|
||||
return capa.features.common.Format
|
||||
elif key == "arch":
|
||||
|
||||
return capa.features.common.Arch
|
||||
else:
|
||||
raise InvalidRule("unexpected statement: %s" % key)
|
||||
|
||||
@@ -456,6 +472,12 @@ def build_statements(d, scope: str):
|
||||
raise InvalidRule("unexpected range: %s" % (count))
|
||||
elif key == "string" and not isinstance(d[key], str):
|
||||
raise InvalidRule("ambiguous string value %s, must be defined as explicit string" % d[key])
|
||||
elif (
|
||||
(key == "os" and d[key] not in capa.features.common.VALID_OS)
|
||||
or (key == "format" and d[key] not in capa.features.common.VALID_FORMAT)
|
||||
or (key == "arch" and d[key] not in capa.features.common.VALID_ARCH)
|
||||
):
|
||||
raise InvalidRule("unexpected %s value %s" % (key, d[key]))
|
||||
else:
|
||||
Feature = parse_feature(key)
|
||||
value, description = parse_description(d[key], key, d.get("description"))
|
||||
|
||||
@@ -1 +1 @@
|
||||
__version__ = "2.0.0"
|
||||
__version__ = "3.0.2"
|
||||
|
||||
@@ -27,6 +27,10 @@ To install capa as a Python library use `pip` to fetch the `flare-capa` module.
|
||||
#### *Note*:
|
||||
This method is appropriate for integrating capa in an existing project.
|
||||
This technique doesn't pull the default rule set, so you should check it out separately from [capa-rules](https://github.com/fireeye/capa-rules/) and pass the directory to the entrypoint using `-r` or set the rules path in the IDA Pro plugin.
|
||||
This technique also doesn't set up the default library identification [signatures](https://github.com/fireeye/capa/tree/master/sigs). You can pass the signature directory using the `-s` argument.
|
||||
For example, to run capa with both a rule path and a signature path:
|
||||
|
||||
capa -r /path/to/capa-rules -s /path/to/capa-sigs suspicious.exe
|
||||
Alternatively, see Method 3 below.
|
||||
|
||||
### 1. Install capa module
|
||||
|
||||
2
rules
2
rules
Submodule rules updated: 00490c1f68...f04491001d
@@ -126,7 +126,7 @@ def get_capa_results(args):
|
||||
"error": "unexpected error: %s" % (e),
|
||||
}
|
||||
|
||||
meta = capa.main.collect_metadata("", path, "", format, extractor)
|
||||
meta = capa.main.collect_metadata("", path, "", extractor)
|
||||
capabilities, counts = capa.main.find_capabilities(rules, extractor, disable_progress=True)
|
||||
meta["analysis"].update(counts)
|
||||
|
||||
|
||||
@@ -43,7 +43,7 @@ import capa.rules
|
||||
import capa.engine
|
||||
import capa.features
|
||||
import capa.features.insn
|
||||
from capa.features.common import ARCH_X32, ARCH_X64, String
|
||||
from capa.features.common import BITNESS_X32, BITNESS_X64, String
|
||||
|
||||
logger = logging.getLogger("capa2yara")
|
||||
|
||||
|
||||
@@ -104,28 +104,16 @@ def render_attack(doc, ostream):
|
||||
for rule in rutils.capability_rules(doc):
|
||||
if not rule["meta"].get("att&ck"):
|
||||
continue
|
||||
|
||||
for attack in rule["meta"]["att&ck"]:
|
||||
tactic, _, rest = attack.partition("::")
|
||||
if "::" in rest:
|
||||
technique, _, rest = rest.partition("::")
|
||||
subtechnique, _, id = rest.rpartition(" ")
|
||||
tactics[tactic].add((technique, subtechnique, id))
|
||||
else:
|
||||
technique, _, id = rest.rpartition(" ")
|
||||
tactics[tactic].add((technique, id))
|
||||
tactics[attack["tactic"]].add((attack["technique"], attack.get("subtechnique"), attack["id"]))
|
||||
|
||||
for tactic, techniques in sorted(tactics.items()):
|
||||
inner_rows = []
|
||||
for spec in sorted(techniques):
|
||||
if len(spec) == 2:
|
||||
technique, id = spec
|
||||
for (technique, subtechnique, id) in sorted(techniques):
|
||||
if subtechnique is None:
|
||||
inner_rows.append("%s %s" % (technique, id))
|
||||
elif len(spec) == 3:
|
||||
technique, subtechnique, id = spec
|
||||
inner_rows.append("%s::%s %s" % (technique, subtechnique, id))
|
||||
else:
|
||||
raise RuntimeError("unexpected ATT&CK spec format")
|
||||
inner_rows.append("%s::%s %s" % (technique, subtechnique, id))
|
||||
ostream["ATTCK"].setdefault(tactic.upper(), inner_rows)
|
||||
|
||||
|
||||
@@ -150,31 +138,16 @@ def render_mbc(doc, ostream):
|
||||
if not rule["meta"].get("mbc"):
|
||||
continue
|
||||
|
||||
mbcs = rule["meta"]["mbc"]
|
||||
if not isinstance(mbcs, list):
|
||||
raise ValueError("invalid rule: MBC mapping is not a list")
|
||||
|
||||
for mbc in mbcs:
|
||||
objective, _, rest = mbc.partition("::")
|
||||
if "::" in rest:
|
||||
behavior, _, rest = rest.partition("::")
|
||||
method, _, id = rest.rpartition(" ")
|
||||
objectives[objective].add((behavior, method, id))
|
||||
else:
|
||||
behavior, _, id = rest.rpartition(" ")
|
||||
objectives[objective].add((behavior, id))
|
||||
for mbc in rule["meta"]["mbc"]:
|
||||
objectives[mbc["objective"]].add((mbc["behavior"], mbc.get("method"), mbc["id"]))
|
||||
|
||||
for objective, behaviors in sorted(objectives.items()):
|
||||
inner_rows = []
|
||||
for spec in sorted(behaviors):
|
||||
if len(spec) == 2:
|
||||
behavior, id = spec
|
||||
inner_rows.append("%s %s" % (behavior, id))
|
||||
elif len(spec) == 3:
|
||||
behavior, method, id = spec
|
||||
inner_rows.append("%s::%s %s" % (behavior, method, id))
|
||||
for (behavior, method, id) in sorted(behaviors):
|
||||
if method is None:
|
||||
inner_rows.append("%s [%s]" % (behavior, id))
|
||||
else:
|
||||
raise RuntimeError("unexpected MBC spec format")
|
||||
inner_rows.append("%s::%s [%s]" % (behavior, method, id))
|
||||
ostream["MBC"].setdefault(objective.upper(), inner_rows)
|
||||
|
||||
|
||||
@@ -196,7 +169,7 @@ def capa_details(file_path, output_format="dictionary"):
|
||||
capabilities, counts = capa.main.find_capabilities(rules, extractor, disable_progress=True)
|
||||
|
||||
# collect metadata (used only to make rendering more complete)
|
||||
meta = capa.main.collect_metadata("", file_path, RULES_PATH, "auto", extractor)
|
||||
meta = capa.main.collect_metadata("", file_path, RULES_PATH, extractor)
|
||||
meta["analysis"].update(counts)
|
||||
|
||||
capa_output = False
|
||||
|
||||
74
scripts/detect-elf-os.py
Normal file
74
scripts/detect-elf-os.py
Normal file
@@ -0,0 +1,74 @@
|
||||
#!/usr/bin/env python2
|
||||
"""
|
||||
Copyright (C) 2021 FireEye, Inc. All Rights Reserved.
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
detect-elf-os
|
||||
|
||||
Attempt to detect the underlying OS that the given ELF file targets.
|
||||
"""
|
||||
import sys
|
||||
import logging
|
||||
import argparse
|
||||
import contextlib
|
||||
from typing import BinaryIO
|
||||
|
||||
import capa.helpers
|
||||
import capa.features.extractors.elf
|
||||
|
||||
logger = logging.getLogger("capa.detect-elf-os")
|
||||
|
||||
|
||||
def main(argv=None):
|
||||
if capa.helpers.is_runtime_ida():
|
||||
from capa.ida.helpers import IDAIO
|
||||
|
||||
f: BinaryIO = IDAIO()
|
||||
|
||||
else:
|
||||
if argv is None:
|
||||
argv = sys.argv[1:]
|
||||
|
||||
parser = argparse.ArgumentParser(description="Detect the underlying OS for the given ELF file")
|
||||
parser.add_argument("sample", type=str, help="path to ELF file")
|
||||
|
||||
logging_group = parser.add_argument_group("logging arguments")
|
||||
|
||||
logging_group.add_argument("-d", "--debug", action="store_true", help="enable debugging output on STDERR")
|
||||
logging_group.add_argument(
|
||||
"-q", "--quiet", action="store_true", help="disable all status output except fatal errors"
|
||||
)
|
||||
|
||||
args = parser.parse_args(args=argv)
|
||||
|
||||
if args.quiet:
|
||||
logging.basicConfig(level=logging.WARNING)
|
||||
logging.getLogger().setLevel(logging.WARNING)
|
||||
elif args.debug:
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
logging.getLogger().setLevel(logging.DEBUG)
|
||||
else:
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logging.getLogger().setLevel(logging.INFO)
|
||||
|
||||
f = open(args.sample, "rb")
|
||||
|
||||
with contextlib.closing(f):
|
||||
try:
|
||||
print(capa.features.extractors.elf.detect_elf_os(f))
|
||||
return 0
|
||||
except capa.features.extractors.elf.CorruptElfFile as e:
|
||||
logger.error("corrupt ELF file: %s", str(e.args[0]))
|
||||
return -1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if capa.helpers.is_runtime_ida():
|
||||
main()
|
||||
else:
|
||||
sys.exit(main())
|
||||
351
scripts/lint.py
351
scripts/lint.py
@@ -13,26 +13,37 @@ Unless required by applicable law or agreed to in writing, software distributed
|
||||
is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and limitations under the License.
|
||||
"""
|
||||
import gc
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import string
|
||||
import difflib
|
||||
import hashlib
|
||||
import inspect
|
||||
import logging
|
||||
import os.path
|
||||
import pathlib
|
||||
import argparse
|
||||
import itertools
|
||||
import posixpath
|
||||
import contextlib
|
||||
from typing import Set, Dict, List
|
||||
from pathlib import Path
|
||||
from dataclasses import field, dataclass
|
||||
|
||||
import tqdm
|
||||
import termcolor
|
||||
import ruamel.yaml
|
||||
import tqdm.contrib.logging
|
||||
|
||||
import capa.main
|
||||
import capa.rules
|
||||
import capa.engine
|
||||
import capa.features.insn
|
||||
import capa.features.common
|
||||
from capa.rules import Rule, RuleSet
|
||||
from capa.features.common import Feature
|
||||
|
||||
logger = logging.getLogger("lint")
|
||||
|
||||
@@ -49,6 +60,22 @@ def green(s):
|
||||
return termcolor.colored(s, "green")
|
||||
|
||||
|
||||
@dataclass
|
||||
class Context:
|
||||
"""
|
||||
attributes:
|
||||
samples: mapping from content hash (MD5, SHA, etc.) to file path.
|
||||
rules: rules to inspect
|
||||
is_thorough: should inspect long-running lints
|
||||
capabilities_by_sample: cache of results, indexed by file path.
|
||||
"""
|
||||
|
||||
samples: Dict[str, Path]
|
||||
rules: RuleSet
|
||||
is_thorough: bool
|
||||
capabilities_by_sample: Dict[Path, Set[str]] = field(default_factory=dict)
|
||||
|
||||
|
||||
class Lint:
|
||||
WARN = orange("WARN")
|
||||
FAIL = red("FAIL")
|
||||
@@ -57,7 +84,7 @@ class Lint:
|
||||
level = FAIL
|
||||
recommendation = ""
|
||||
|
||||
def check_rule(self, ctx, rule):
|
||||
def check_rule(self, ctx: Context, rule: Rule):
|
||||
return False
|
||||
|
||||
|
||||
@@ -65,7 +92,7 @@ class NameCasing(Lint):
|
||||
name = "rule name casing"
|
||||
recommendation = "Rename rule using to start with lower case letters"
|
||||
|
||||
def check_rule(self, ctx, rule):
|
||||
def check_rule(self, ctx: Context, rule: Rule):
|
||||
return rule.name[0] in string.ascii_uppercase and rule.name[1] not in string.ascii_uppercase
|
||||
|
||||
|
||||
@@ -74,7 +101,7 @@ class FilenameDoesntMatchRuleName(Lint):
|
||||
recommendation = "Rename rule file to match the rule name"
|
||||
recommendation_template = 'Rename rule file to match the rule name, expected: "{:s}", found: "{:s}"'
|
||||
|
||||
def check_rule(self, ctx, rule):
|
||||
def check_rule(self, ctx: Context, rule: Rule):
|
||||
expected = rule.name
|
||||
expected = expected.lower()
|
||||
expected = expected.replace(" ", "-")
|
||||
@@ -96,7 +123,7 @@ class MissingNamespace(Lint):
|
||||
name = "missing rule namespace"
|
||||
recommendation = "Add meta.namespace so that the rule is emitted correctly"
|
||||
|
||||
def check_rule(self, ctx, rule):
|
||||
def check_rule(self, ctx: Context, rule: Rule):
|
||||
return (
|
||||
"namespace" not in rule.meta
|
||||
and not is_nursery_rule(rule)
|
||||
@@ -109,7 +136,7 @@ class NamespaceDoesntMatchRulePath(Lint):
|
||||
name = "file path doesn't match rule namespace"
|
||||
recommendation = "Move rule to appropriate directory or update the namespace"
|
||||
|
||||
def check_rule(self, ctx, rule):
|
||||
def check_rule(self, ctx: Context, rule: Rule):
|
||||
# let the other lints catch namespace issues
|
||||
if "namespace" not in rule.meta:
|
||||
return False
|
||||
@@ -127,7 +154,7 @@ class MissingScope(Lint):
|
||||
name = "missing scope"
|
||||
recommendation = "Add meta.scope so that the scope is explicit (defaults to `function`)"
|
||||
|
||||
def check_rule(self, ctx, rule):
|
||||
def check_rule(self, ctx: Context, rule: Rule):
|
||||
return "scope" not in rule.meta
|
||||
|
||||
|
||||
@@ -135,7 +162,7 @@ class InvalidScope(Lint):
|
||||
name = "invalid scope"
|
||||
recommendation = "Use only file, function, or basic block rule scopes"
|
||||
|
||||
def check_rule(self, ctx, rule):
|
||||
def check_rule(self, ctx: Context, rule: Rule):
|
||||
return rule.meta.get("scope") not in ("file", "function", "basic block")
|
||||
|
||||
|
||||
@@ -143,7 +170,7 @@ class MissingAuthor(Lint):
|
||||
name = "missing author"
|
||||
recommendation = "Add meta.author so that users know who to contact with questions"
|
||||
|
||||
def check_rule(self, ctx, rule):
|
||||
def check_rule(self, ctx: Context, rule: Rule):
|
||||
return "author" not in rule.meta
|
||||
|
||||
|
||||
@@ -151,7 +178,7 @@ class MissingExamples(Lint):
|
||||
name = "missing examples"
|
||||
recommendation = "Add meta.examples so that the rule can be tested and verified"
|
||||
|
||||
def check_rule(self, ctx, rule):
|
||||
def check_rule(self, ctx: Context, rule: Rule):
|
||||
return (
|
||||
"examples" not in rule.meta
|
||||
or not isinstance(rule.meta["examples"], list)
|
||||
@@ -164,7 +191,7 @@ class MissingExampleOffset(Lint):
|
||||
name = "missing example offset"
|
||||
recommendation = "Add offset of example function"
|
||||
|
||||
def check_rule(self, ctx, rule):
|
||||
def check_rule(self, ctx: Context, rule: Rule):
|
||||
if rule.meta.get("scope") in ("function", "basic block"):
|
||||
examples = rule.meta.get("examples")
|
||||
if isinstance(examples, list):
|
||||
@@ -178,7 +205,7 @@ class ExampleFileDNE(Lint):
|
||||
name = "referenced example doesn't exist"
|
||||
recommendation = "Add the referenced example to samples directory ($capa-root/tests/data or supplied via --samples)"
|
||||
|
||||
def check_rule(self, ctx, rule):
|
||||
def check_rule(self, ctx: Context, rule: Rule):
|
||||
if not rule.meta.get("examples"):
|
||||
# let the MissingExamples lint catch this case, don't double report.
|
||||
return False
|
||||
@@ -187,7 +214,7 @@ class ExampleFileDNE(Lint):
|
||||
for example in rule.meta.get("examples", []):
|
||||
if example:
|
||||
example_id = example.partition(":")[0]
|
||||
if example_id in ctx["samples"]:
|
||||
if example_id in ctx.samples:
|
||||
found = True
|
||||
break
|
||||
|
||||
@@ -197,12 +224,42 @@ class ExampleFileDNE(Lint):
|
||||
DEFAULT_SIGNATURES = capa.main.get_default_signatures()
|
||||
|
||||
|
||||
def get_sample_capabilities(ctx: Context, path: Path) -> Set[str]:
|
||||
nice_path = os.path.abspath(str(path))
|
||||
if path in ctx.capabilities_by_sample:
|
||||
logger.debug("found cached results: %s: %d capabilities", nice_path, len(ctx.capabilities_by_sample[path]))
|
||||
return ctx.capabilities_by_sample[path]
|
||||
|
||||
logger.debug("analyzing sample: %s", nice_path)
|
||||
extractor = capa.main.get_extractor(
|
||||
nice_path, "auto", capa.main.BACKEND_VIV, DEFAULT_SIGNATURES, False, disable_progress=True
|
||||
)
|
||||
|
||||
capabilities, _ = capa.main.find_capabilities(ctx.rules, extractor, disable_progress=True)
|
||||
# mypy doesn't seem to be happy with the MatchResults type alias & set(...keys())?
|
||||
# so we ignore a few types here.
|
||||
capabilities = set(capabilities.keys()) # type: ignore
|
||||
assert isinstance(capabilities, set)
|
||||
|
||||
logger.debug("computed results: %s: %d capabilities", nice_path, len(capabilities))
|
||||
ctx.capabilities_by_sample[path] = capabilities
|
||||
|
||||
# when i (wb) run the linter in thorough mode locally,
|
||||
# the OS occasionally kills the process due to memory usage.
|
||||
# so, be extra aggressive in keeping memory usage down.
|
||||
#
|
||||
# tbh, im not sure this actually does anything, but maybe it helps?
|
||||
gc.collect()
|
||||
|
||||
return capabilities
|
||||
|
||||
|
||||
class DoesntMatchExample(Lint):
|
||||
name = "doesn't match on referenced example"
|
||||
recommendation = "Fix the rule logic or provide a different example"
|
||||
|
||||
def check_rule(self, ctx, rule):
|
||||
if not ctx["is_thorough"]:
|
||||
def check_rule(self, ctx: Context, rule: Rule):
|
||||
if not ctx.is_thorough:
|
||||
return False
|
||||
|
||||
examples = rule.meta.get("examples", [])
|
||||
@@ -212,19 +269,16 @@ class DoesntMatchExample(Lint):
|
||||
for example in examples:
|
||||
example_id = example.partition(":")[0]
|
||||
try:
|
||||
path = ctx["samples"][example_id]
|
||||
path = ctx.samples[example_id]
|
||||
except KeyError:
|
||||
# lint ExampleFileDNE will catch this.
|
||||
# don't double report.
|
||||
continue
|
||||
|
||||
try:
|
||||
extractor = capa.main.get_extractor(
|
||||
path, "auto", capa.main.BACKEND_VIV, DEFAULT_SIGNATURES, False, disable_progress=True
|
||||
)
|
||||
capabilities, meta = capa.main.find_capabilities(ctx["rules"], extractor, disable_progress=True)
|
||||
capabilities = get_sample_capabilities(ctx, path)
|
||||
except Exception as e:
|
||||
logger.error("failed to extract capabilities: %s %s %s", rule.name, path, e)
|
||||
logger.error("failed to extract capabilities: %s %s %s", rule.name, str(path), e, exc_info=True)
|
||||
return True
|
||||
|
||||
if rule.name not in capabilities:
|
||||
@@ -237,7 +291,7 @@ class StatementWithSingleChildStatement(Lint):
|
||||
recommendation_template = "remove the superfluous parent statement: {:s}"
|
||||
violation = False
|
||||
|
||||
def check_rule(self, ctx, rule):
|
||||
def check_rule(self, ctx: Context, rule: Rule):
|
||||
self.violation = False
|
||||
|
||||
def rec(statement, is_root=False):
|
||||
@@ -260,7 +314,7 @@ class OrStatementWithAlwaysTrueChild(Lint):
|
||||
recommendation_template = "clarify the rule logic, e.g. by moving the always True child statement: {:s}"
|
||||
violation = False
|
||||
|
||||
def check_rule(self, ctx, rule):
|
||||
def check_rule(self, ctx: Context, rule: Rule):
|
||||
self.violation = False
|
||||
|
||||
def rec(statement):
|
||||
@@ -283,7 +337,7 @@ class UnusualMetaField(Lint):
|
||||
recommendation = "Remove the meta field"
|
||||
recommendation_template = 'Remove the meta field: "{:s}"'
|
||||
|
||||
def check_rule(self, ctx, rule):
|
||||
def check_rule(self, ctx: Context, rule: Rule):
|
||||
for key in rule.meta.keys():
|
||||
if key in capa.rules.META_KEYS:
|
||||
continue
|
||||
@@ -299,7 +353,7 @@ class LibRuleNotInLibDirectory(Lint):
|
||||
name = "lib rule not found in lib directory"
|
||||
recommendation = "Move the rule to the `lib` subdirectory of the rules path"
|
||||
|
||||
def check_rule(self, ctx, rule):
|
||||
def check_rule(self, ctx: Context, rule: Rule):
|
||||
if is_nursery_rule(rule):
|
||||
return False
|
||||
|
||||
@@ -313,7 +367,7 @@ class LibRuleHasNamespace(Lint):
|
||||
name = "lib rule has a namespace"
|
||||
recommendation = "Remove the namespace from the rule"
|
||||
|
||||
def check_rule(self, ctx, rule):
|
||||
def check_rule(self, ctx: Context, rule: Rule):
|
||||
if "lib" not in rule.meta:
|
||||
return False
|
||||
|
||||
@@ -324,9 +378,10 @@ class FeatureStringTooShort(Lint):
|
||||
name = "feature string too short"
|
||||
recommendation = 'capa only extracts strings with length >= 4; will not match on "{:s}"'
|
||||
|
||||
def check_features(self, ctx, features):
|
||||
def check_features(self, ctx: Context, features: List[Feature]):
|
||||
for feature in features:
|
||||
if isinstance(feature, capa.features.common.String):
|
||||
if isinstance(feature, (capa.features.common.String, capa.features.common.Substring)):
|
||||
assert isinstance(feature.value, str)
|
||||
if len(feature.value) < 4:
|
||||
self.recommendation = self.recommendation.format(feature.value)
|
||||
return True
|
||||
@@ -341,9 +396,10 @@ class FeatureNegativeNumber(Lint):
|
||||
'representation; will not match on "{:d}"'
|
||||
)
|
||||
|
||||
def check_features(self, ctx, features):
|
||||
def check_features(self, ctx: Context, features: List[Feature]):
|
||||
for feature in features:
|
||||
if isinstance(feature, (capa.features.insn.Number,)):
|
||||
assert isinstance(feature.value, int)
|
||||
if feature.value < 0:
|
||||
self.recommendation = self.recommendation_template.format(feature.value)
|
||||
return True
|
||||
@@ -353,17 +409,61 @@ class FeatureNegativeNumber(Lint):
|
||||
class FeatureNtdllNtoskrnlApi(Lint):
|
||||
name = "feature api may overlap with ntdll and ntoskrnl"
|
||||
level = Lint.WARN
|
||||
recommendation = (
|
||||
recommendation_template = (
|
||||
"check if {:s} is exported by both ntdll and ntoskrnl; if true, consider removing {:s} "
|
||||
"module requirement to improve detection"
|
||||
)
|
||||
|
||||
def check_features(self, ctx, features):
|
||||
def check_features(self, ctx: Context, features: List[Feature]):
|
||||
for feature in features:
|
||||
if isinstance(feature, capa.features.insn.API):
|
||||
assert isinstance(feature.value, str)
|
||||
modname, _, impname = feature.value.rpartition(".")
|
||||
|
||||
if modname == "ntdll":
|
||||
if impname in (
|
||||
"LdrGetProcedureAddress",
|
||||
"LdrLoadDll",
|
||||
"NtCreateThread",
|
||||
"NtCreatUserProcess",
|
||||
"NtLoadDriver",
|
||||
"NtQueryDirectoryObject",
|
||||
"NtResumeThread",
|
||||
"NtSuspendThread",
|
||||
"NtTerminateProcess",
|
||||
"NtWriteVirtualMemory",
|
||||
"RtlGetNativeSystemInformation",
|
||||
"NtCreateThreadEx",
|
||||
"NtCreateUserProcess",
|
||||
"NtOpenDirectoryObject",
|
||||
"NtQueueApcThread",
|
||||
"ZwResumeThread",
|
||||
"ZwSuspendThread",
|
||||
"ZwWriteVirtualMemory",
|
||||
"NtCreateProcess",
|
||||
"ZwCreateThread",
|
||||
"NtCreateProcessEx",
|
||||
"ZwCreateThreadEx",
|
||||
"ZwCreateProcess",
|
||||
"ZwCreateUserProcess",
|
||||
"RtlCreateUserProcess",
|
||||
):
|
||||
# ntoskrnl.exe does not export these routines
|
||||
continue
|
||||
|
||||
if modname == "ntoskrnl":
|
||||
if impname in (
|
||||
"PsGetVersion",
|
||||
"PsLookupProcessByProcessId",
|
||||
"KeStackAttachProcess",
|
||||
"ObfDereferenceObject",
|
||||
"KeUnstackDetachProcess",
|
||||
):
|
||||
# ntdll.dll does not export these routines
|
||||
continue
|
||||
|
||||
if modname in ("ntdll", "ntoskrnl"):
|
||||
self.recommendation = self.recommendation.format(impname, modname)
|
||||
self.recommendation = self.recommendation_template.format(impname, modname)
|
||||
return True
|
||||
return False
|
||||
|
||||
@@ -372,7 +472,7 @@ class FormatLineFeedEOL(Lint):
|
||||
name = "line(s) end with CRLF (\\r\\n)"
|
||||
recommendation = "convert line endings to LF (\\n) for example using dos2unix"
|
||||
|
||||
def check_rule(self, ctx, rule):
|
||||
def check_rule(self, ctx: Context, rule: Rule):
|
||||
if len(rule.definition.split("\r\n")) > 0:
|
||||
return False
|
||||
return True
|
||||
@@ -382,7 +482,7 @@ class FormatSingleEmptyLineEOF(Lint):
|
||||
name = "EOF format"
|
||||
recommendation = "end file with a single empty line"
|
||||
|
||||
def check_rule(self, ctx, rule):
|
||||
def check_rule(self, ctx: Context, rule: Rule):
|
||||
if rule.definition.endswith("\n") and not rule.definition.endswith("\n\n"):
|
||||
return False
|
||||
return True
|
||||
@@ -392,7 +492,7 @@ class FormatIncorrect(Lint):
|
||||
name = "rule format incorrect"
|
||||
recommendation_template = "use scripts/capafmt.py or adjust as follows\n{:s}"
|
||||
|
||||
def check_rule(self, ctx, rule):
|
||||
def check_rule(self, ctx: Context, rule: Rule):
|
||||
actual = rule.definition
|
||||
expected = capa.rules.Rule.from_yaml(rule.definition, use_ruamel=True).to_yaml()
|
||||
|
||||
@@ -412,36 +512,53 @@ class FormatIncorrect(Lint):
|
||||
class FormatStringQuotesIncorrect(Lint):
|
||||
name = "rule string quotes incorrect"
|
||||
|
||||
def check_rule(self, ctx, rule):
|
||||
def check_rule(self, ctx: Context, rule: Rule):
|
||||
events = capa.rules.Rule._get_ruamel_yaml_parser().parse(rule.definition)
|
||||
for key in events:
|
||||
if not (isinstance(key, ruamel.yaml.ScalarEvent) and key.value == "string"):
|
||||
if isinstance(key, ruamel.yaml.ScalarEvent) and key.value == "string":
|
||||
value = next(events) # assume value is next event
|
||||
if not isinstance(value, ruamel.yaml.ScalarEvent):
|
||||
# ignore non-scalar
|
||||
continue
|
||||
if value.value.startswith("/") and value.value.endswith(("/", "/i")):
|
||||
# ignore regex for now
|
||||
continue
|
||||
if value.style is None:
|
||||
# no quotes
|
||||
self.recommendation = 'add double quotes to "%s"' % value.value
|
||||
return True
|
||||
if value.style == "'":
|
||||
# single quote
|
||||
self.recommendation = 'change single quotes to double quotes for "%s"' % value.value
|
||||
return True
|
||||
|
||||
elif isinstance(key, ruamel.yaml.ScalarEvent) and key.value == "substring":
|
||||
value = next(events) # assume value is next event
|
||||
if not isinstance(value, ruamel.yaml.ScalarEvent):
|
||||
# ignore non-scalar
|
||||
continue
|
||||
if value.style is None:
|
||||
# no quotes
|
||||
self.recommendation = 'add double quotes to "%s"' % value.value
|
||||
return True
|
||||
if value.style == "'":
|
||||
# single quote
|
||||
self.recommendation = 'change single quotes to double quotes for "%s"' % value.value
|
||||
return True
|
||||
|
||||
else:
|
||||
continue
|
||||
value = next(events) # assume value is next event
|
||||
if not isinstance(value, ruamel.yaml.ScalarEvent):
|
||||
# ignore non-scalar
|
||||
continue
|
||||
if value.value.startswith("/") and value.value.endswith(("/", "/i")):
|
||||
# ignore regex for now
|
||||
continue
|
||||
if value.style is None:
|
||||
# no quotes
|
||||
self.recommendation = 'add double quotes to "%s"' % value.value
|
||||
return True
|
||||
if value.style == "'":
|
||||
# single quote
|
||||
self.recommendation = 'change single quotes to double quotes for "%s"' % value.value
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def run_lints(lints, ctx, rule):
|
||||
def run_lints(lints, ctx: Context, rule: Rule):
|
||||
for lint in lints:
|
||||
if lint.check_rule(ctx, rule):
|
||||
yield lint
|
||||
|
||||
|
||||
def run_feature_lints(lints, ctx, features):
|
||||
def run_feature_lints(lints, ctx: Context, features: List[Feature]):
|
||||
for lint in lints:
|
||||
if lint.check_features(ctx, features):
|
||||
yield lint
|
||||
@@ -453,7 +570,7 @@ NAME_LINTS = (
|
||||
)
|
||||
|
||||
|
||||
def lint_name(ctx, rule):
|
||||
def lint_name(ctx: Context, rule: Rule):
|
||||
return run_lints(NAME_LINTS, ctx, rule)
|
||||
|
||||
|
||||
@@ -463,7 +580,7 @@ SCOPE_LINTS = (
|
||||
)
|
||||
|
||||
|
||||
def lint_scope(ctx, rule):
|
||||
def lint_scope(ctx: Context, rule: Rule):
|
||||
return run_lints(SCOPE_LINTS, ctx, rule)
|
||||
|
||||
|
||||
@@ -480,14 +597,14 @@ META_LINTS = (
|
||||
)
|
||||
|
||||
|
||||
def lint_meta(ctx, rule):
|
||||
def lint_meta(ctx: Context, rule: Rule):
|
||||
return run_lints(META_LINTS, ctx, rule)
|
||||
|
||||
|
||||
FEATURE_LINTS = (FeatureStringTooShort(), FeatureNegativeNumber(), FeatureNtdllNtoskrnlApi())
|
||||
|
||||
|
||||
def lint_features(ctx, rule):
|
||||
def lint_features(ctx: Context, rule: Rule):
|
||||
features = get_features(ctx, rule)
|
||||
return run_feature_lints(FEATURE_LINTS, ctx, features)
|
||||
|
||||
@@ -500,7 +617,7 @@ FORMAT_LINTS = (
|
||||
)
|
||||
|
||||
|
||||
def lint_format(ctx, rule):
|
||||
def lint_format(ctx: Context, rule: Rule):
|
||||
return run_lints(FORMAT_LINTS, ctx, rule)
|
||||
|
||||
|
||||
@@ -508,11 +625,11 @@ def get_normpath(path):
|
||||
return posixpath.normpath(path).replace(os.sep, "/")
|
||||
|
||||
|
||||
def get_features(ctx, rule):
|
||||
def get_features(ctx: Context, rule: Rule):
|
||||
# get features from rule and all dependencies including subscopes and matched rules
|
||||
features = []
|
||||
namespaces = ctx["rules"].rules_by_namespace
|
||||
deps = [ctx["rules"].rules[dep] for dep in rule.get_dependencies(namespaces)]
|
||||
namespaces = ctx.rules.rules_by_namespace
|
||||
deps = [ctx.rules.rules[dep] for dep in rule.get_dependencies(namespaces)]
|
||||
for r in [rule] + deps:
|
||||
features.extend(get_rule_features(r))
|
||||
return features
|
||||
@@ -539,7 +656,7 @@ LOGIC_LINTS = (
|
||||
)
|
||||
|
||||
|
||||
def lint_logic(ctx, rule):
|
||||
def lint_logic(ctx: Context, rule: Rule):
|
||||
return run_lints(LOGIC_LINTS, ctx, rule)
|
||||
|
||||
|
||||
@@ -552,7 +669,7 @@ def is_nursery_rule(rule):
|
||||
return rule.meta.get("capa/nursery")
|
||||
|
||||
|
||||
def lint_rule(ctx, rule):
|
||||
def lint_rule(ctx: Context, rule: Rule):
|
||||
logger.debug(rule.name)
|
||||
|
||||
violations = list(
|
||||
@@ -567,30 +684,34 @@ def lint_rule(ctx, rule):
|
||||
)
|
||||
|
||||
if len(violations) > 0:
|
||||
category = rule.meta.get("rule-category")
|
||||
# don't show nursery rules with a single violation: needs examples.
|
||||
# this is by far the most common reason to be in the nursery,
|
||||
# and ends up just producing a lot of noise.
|
||||
if not (is_nursery_rule(rule) and len(violations) == 1 and violations[0].name == "missing examples"):
|
||||
category = rule.meta.get("rule-category")
|
||||
|
||||
print("")
|
||||
print(
|
||||
"%s%s %s"
|
||||
% (
|
||||
" (nursery) " if is_nursery_rule(rule) else "",
|
||||
rule.name,
|
||||
("(%s)" % category) if category else "",
|
||||
)
|
||||
)
|
||||
|
||||
for violation in violations:
|
||||
print("")
|
||||
print(
|
||||
"%s %s: %s: %s"
|
||||
"%s%s %s"
|
||||
% (
|
||||
" " if is_nursery_rule(rule) else "",
|
||||
Lint.WARN if is_nursery_rule(rule) else violation.level,
|
||||
violation.name,
|
||||
violation.recommendation,
|
||||
" (nursery) " if is_nursery_rule(rule) else "",
|
||||
rule.name,
|
||||
("(%s)" % category) if category else "",
|
||||
)
|
||||
)
|
||||
|
||||
print("")
|
||||
for violation in violations:
|
||||
print(
|
||||
"%s %s: %s: %s"
|
||||
% (
|
||||
" " if is_nursery_rule(rule) else "",
|
||||
Lint.WARN if is_nursery_rule(rule) else violation.level,
|
||||
violation.name,
|
||||
violation.recommendation,
|
||||
)
|
||||
)
|
||||
|
||||
print("")
|
||||
|
||||
if is_nursery_rule(rule):
|
||||
has_examples = not any(map(lambda v: v.level == Lint.FAIL and v.name == "missing examples", violations))
|
||||
@@ -625,30 +746,62 @@ def lint_rule(ctx, rule):
|
||||
return (lints_failed, lints_warned)
|
||||
|
||||
|
||||
def lint(ctx, rules):
|
||||
"""
|
||||
Args:
|
||||
samples (Dict[string, string]): map from sample id to path.
|
||||
for each sample, record sample id of sha256, md5, and filename.
|
||||
see `collect_samples(path)`.
|
||||
rules (List[Rule]): the rules to lint.
|
||||
def width(s, count):
|
||||
if len(s) > count:
|
||||
return s[: count - 3] + "..."
|
||||
else:
|
||||
return s.ljust(count)
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def redirecting_print_to_tqdm():
|
||||
"""
|
||||
tqdm (progress bar) expects to have fairly tight control over console output.
|
||||
so calls to `print()` will break the progress bar and make things look bad.
|
||||
so, this context manager temporarily replaces the `print` implementation
|
||||
with one that is compatible with tqdm.
|
||||
|
||||
via: https://stackoverflow.com/a/42424890/87207
|
||||
"""
|
||||
old_print = print
|
||||
|
||||
def new_print(*args, **kwargs):
|
||||
|
||||
# If tqdm.tqdm.write raises error, use builtin print
|
||||
try:
|
||||
tqdm.tqdm.write(*args, **kwargs)
|
||||
except:
|
||||
old_print(*args, **kwargs)
|
||||
|
||||
try:
|
||||
# Globaly replace print with new_print
|
||||
inspect.builtins.print = new_print
|
||||
yield
|
||||
finally:
|
||||
inspect.builtins.print = old_print
|
||||
|
||||
|
||||
def lint(ctx: Context):
|
||||
"""
|
||||
Returns: Dict[string, Tuple(int, int)]
|
||||
- # lints failed
|
||||
- # lints warned
|
||||
"""
|
||||
ret = {}
|
||||
|
||||
for name, rule in rules.rules.items():
|
||||
if rule.meta.get("capa/subscope-rule", False):
|
||||
continue
|
||||
with tqdm.contrib.logging.tqdm_logging_redirect(ctx.rules.rules.items(), unit="rule") as pbar:
|
||||
with redirecting_print_to_tqdm():
|
||||
for name, rule in pbar:
|
||||
if rule.meta.get("capa/subscope-rule", False):
|
||||
continue
|
||||
|
||||
ret[name] = lint_rule(ctx, rule)
|
||||
pbar.set_description(width("linting rule: %s" % (name), 48))
|
||||
ret[name] = lint_rule(ctx, rule)
|
||||
|
||||
return ret
|
||||
|
||||
|
||||
def collect_samples(path):
|
||||
def collect_samples(path) -> Dict[str, Path]:
|
||||
"""
|
||||
recurse through the given path, collecting all file paths, indexed by their content sha256, md5, and filename.
|
||||
"""
|
||||
@@ -666,10 +819,10 @@ def collect_samples(path):
|
||||
if name.endswith(".fnames"):
|
||||
continue
|
||||
|
||||
path = os.path.join(root, name)
|
||||
path = pathlib.Path(os.path.join(root, name))
|
||||
|
||||
try:
|
||||
with open(path, "rb") as f:
|
||||
with path.open("rb") as f:
|
||||
buf = f.read()
|
||||
except IOError:
|
||||
continue
|
||||
@@ -736,13 +889,9 @@ def main(argv=None):
|
||||
|
||||
samples = collect_samples(args.samples)
|
||||
|
||||
ctx = {
|
||||
"samples": samples,
|
||||
"rules": rules,
|
||||
"is_thorough": args.thorough,
|
||||
}
|
||||
ctx = Context(samples=samples, rules=rules, is_thorough=args.thorough)
|
||||
|
||||
results_by_name = lint(ctx, rules)
|
||||
results_by_name = lint(ctx)
|
||||
failed_rules = []
|
||||
warned_rules = []
|
||||
for name, (fail_count, warn_count) in results_by_name.items():
|
||||
|
||||
@@ -105,7 +105,7 @@ def main(argv=None):
|
||||
|
||||
analyzers = []
|
||||
for sigpath in args.signatures:
|
||||
sigs = capa.main.load_flirt_signature(sigpath)
|
||||
sigs = viv_utils.flirt.load_flirt_signature(sigpath)
|
||||
|
||||
with capa.main.timing("flirt: compiling sigs"):
|
||||
matcher = flirt.compile(sigs)
|
||||
|
||||
73
scripts/profile-memory.py
Normal file
73
scripts/profile-memory.py
Normal file
@@ -0,0 +1,73 @@
|
||||
import gc
|
||||
import linecache
|
||||
import tracemalloc
|
||||
|
||||
tracemalloc.start()
|
||||
|
||||
|
||||
def display_top(snapshot, key_type="lineno", limit=10):
|
||||
# via: https://docs.python.org/3/library/tracemalloc.html#pretty-top
|
||||
snapshot = snapshot.filter_traces(
|
||||
(
|
||||
tracemalloc.Filter(False, "<frozen importlib._bootstrap_external>"),
|
||||
tracemalloc.Filter(False, "<frozen importlib._bootstrap>"),
|
||||
tracemalloc.Filter(False, "<unknown>"),
|
||||
)
|
||||
)
|
||||
top_stats = snapshot.statistics(key_type)
|
||||
|
||||
print("Top %s lines" % limit)
|
||||
for index, stat in enumerate(top_stats[:limit], 1):
|
||||
frame = stat.traceback[0]
|
||||
print("#%s: %s:%s: %.1f KiB" % (index, frame.filename, frame.lineno, stat.size / 1024))
|
||||
line = linecache.getline(frame.filename, frame.lineno).strip()
|
||||
if line:
|
||||
print(" %s" % line)
|
||||
|
||||
other = top_stats[limit:]
|
||||
if other:
|
||||
size = sum(stat.size for stat in other)
|
||||
print("%s other: %.1f KiB" % (len(other), size / 1024))
|
||||
total = sum(stat.size for stat in top_stats)
|
||||
print("Total allocated size: %.1f KiB" % (total / 1024))
|
||||
|
||||
|
||||
def main():
|
||||
# import within main to keep isort happy
|
||||
# while also invoking tracemalloc.start() immediately upon start.
|
||||
import io
|
||||
import os
|
||||
import time
|
||||
import contextlib
|
||||
|
||||
import psutil
|
||||
|
||||
import capa.main
|
||||
|
||||
count = int(os.environ.get("CAPA_PROFILE_COUNT", 1))
|
||||
print("total iterations planned: %d (set via env var CAPA_PROFILE_COUNT)." % (count))
|
||||
print()
|
||||
|
||||
for i in range(count):
|
||||
print("iteration %d/%d..." % (i + 1, count))
|
||||
with contextlib.redirect_stdout(io.StringIO()):
|
||||
with contextlib.redirect_stderr(io.StringIO()):
|
||||
t0 = time.time()
|
||||
capa.main.main()
|
||||
t1 = time.time()
|
||||
|
||||
gc.collect()
|
||||
|
||||
process = psutil.Process(os.getpid())
|
||||
print(" duration: %0.02fs" % (t1 - t0))
|
||||
print(" rss: %.1f MiB" % (process.memory_info().rss / 1024 / 1024))
|
||||
print(" vms: %.1f MiB" % (process.memory_info().vms / 1024 / 1024))
|
||||
|
||||
print("done.")
|
||||
gc.collect()
|
||||
|
||||
snapshot0 = tracemalloc.take_snapshot()
|
||||
display_top(snapshot0)
|
||||
|
||||
|
||||
main()
|
||||
@@ -171,7 +171,7 @@ def main(argv=None):
|
||||
logger.error("-" * 80)
|
||||
return -1
|
||||
|
||||
meta = capa.main.collect_metadata(argv, args.sample, args.rules, format, extractor)
|
||||
meta = capa.main.collect_metadata(argv, args.sample, args.rules, extractor)
|
||||
capabilities, counts = capa.main.find_capabilities(rules, extractor)
|
||||
meta["analysis"].update(counts)
|
||||
|
||||
|
||||
@@ -75,6 +75,7 @@ import capa.rules
|
||||
import capa.engine
|
||||
import capa.helpers
|
||||
import capa.features
|
||||
import capa.features.common
|
||||
import capa.features.freeze
|
||||
|
||||
logger = logging.getLogger("capa.show-features")
|
||||
@@ -202,15 +203,26 @@ def print_features(functions, extractor):
|
||||
logger.debug("skipping library function 0x%x (%s)", function_address, function_name)
|
||||
continue
|
||||
|
||||
print("func: 0x%08x" % (function_address))
|
||||
|
||||
for feature, va in extractor.extract_function_features(f):
|
||||
if capa.features.common.is_global_feature(feature):
|
||||
continue
|
||||
|
||||
print("func: 0x%08x: %s" % (va, feature))
|
||||
|
||||
for bb in extractor.get_basic_blocks(f):
|
||||
for feature, va in extractor.extract_basic_block_features(f, bb):
|
||||
if capa.features.common.is_global_feature(feature):
|
||||
continue
|
||||
|
||||
print("bb : 0x%08x: %s" % (va, feature))
|
||||
|
||||
for insn in extractor.get_instructions(f, bb):
|
||||
for feature, va in extractor.extract_insn_features(f, bb, insn):
|
||||
if capa.features.common.is_global_feature(feature):
|
||||
continue
|
||||
|
||||
try:
|
||||
print("insn: 0x%08x: %s" % (va, feature))
|
||||
except UnicodeEncodeError:
|
||||
|
||||
29
setup.py
29
setup.py
@@ -11,21 +11,22 @@ import os
|
||||
import setuptools
|
||||
|
||||
requirements = [
|
||||
"tqdm==4.61.2",
|
||||
"tqdm==4.62.3",
|
||||
"pyyaml==5.4.1",
|
||||
"tabulate==0.8.9",
|
||||
"colorama==0.4.4",
|
||||
"termcolor==1.1.0",
|
||||
"wcwidth==0.2.5",
|
||||
"ida-settings==2.1.0",
|
||||
"viv-utils[flirt]==0.6.5",
|
||||
"viv-utils[flirt]==0.6.6",
|
||||
"halo==0.0.31",
|
||||
"networkx==2.5.1",
|
||||
"ruamel.yaml==0.17.10",
|
||||
"vivisect==1.0.3",
|
||||
"smda==1.5.18",
|
||||
"pefile==2021.5.24",
|
||||
"ruamel.yaml==0.17.16",
|
||||
"vivisect==1.0.5",
|
||||
"smda==1.6.2",
|
||||
"pefile==2021.9.3",
|
||||
"typing==3.7.4.3",
|
||||
"pyelftools==0.27",
|
||||
]
|
||||
|
||||
# this sets __version__
|
||||
@@ -66,20 +67,22 @@ setuptools.setup(
|
||||
install_requires=requirements,
|
||||
extras_require={
|
||||
"dev": [
|
||||
"pytest==6.2.4",
|
||||
"pytest==6.2.5",
|
||||
"pytest-sugar==0.9.4",
|
||||
"pytest-instafail==0.4.2",
|
||||
"pytest-cov==2.12.1",
|
||||
"pycodestyle==2.7.0",
|
||||
"black==21.7b0",
|
||||
"isort==5.9.2",
|
||||
"black==21.9b0",
|
||||
"isort==5.9.3",
|
||||
"mypy==0.910",
|
||||
"psutil==5.8.0",
|
||||
# type stubs for mypy
|
||||
"types-backports==0.1.3",
|
||||
"types-colorama==0.4.2",
|
||||
"types-PyYAML==5.4.3",
|
||||
"types-tabulate==0.1.1",
|
||||
"types-termcolor==0.1.1",
|
||||
"types-colorama==0.4.3",
|
||||
"types-PyYAML==5.4.10",
|
||||
"types-tabulate==0.8.2",
|
||||
"types-termcolor==1.1.1",
|
||||
"types-psutil==5.8.8",
|
||||
],
|
||||
},
|
||||
zip_safe=False,
|
||||
|
||||
Submodule tests/data updated: afb5249689...5a5fefbb39
@@ -10,6 +10,7 @@
|
||||
import os
|
||||
import os.path
|
||||
import binascii
|
||||
import itertools
|
||||
import contextlib
|
||||
import collections
|
||||
from functools import lru_cache
|
||||
@@ -21,7 +22,19 @@ import capa.features.file
|
||||
import capa.features.insn
|
||||
import capa.features.common
|
||||
import capa.features.basicblock
|
||||
from capa.features.common import ARCH_X32, ARCH_X64
|
||||
from capa.features.common import (
|
||||
OS,
|
||||
OS_LINUX,
|
||||
ARCH_I386,
|
||||
FORMAT_PE,
|
||||
ARCH_AMD64,
|
||||
FORMAT_ELF,
|
||||
OS_WINDOWS,
|
||||
BITNESS_X32,
|
||||
BITNESS_X64,
|
||||
Arch,
|
||||
Format,
|
||||
)
|
||||
|
||||
CD = os.path.dirname(__file__)
|
||||
|
||||
@@ -121,6 +134,13 @@ def get_pefile_extractor(path):
|
||||
return capa.features.extractors.pefile.PefileFeatureExtractor(path)
|
||||
|
||||
|
||||
def extract_global_features(extractor):
|
||||
features = collections.defaultdict(set)
|
||||
for feature, va in extractor.extract_global_features():
|
||||
features[feature].add(va)
|
||||
return features
|
||||
|
||||
|
||||
@lru_cache()
|
||||
def extract_file_features(extractor):
|
||||
features = collections.defaultdict(set)
|
||||
@@ -196,6 +216,8 @@ def get_data_path_by_name(name):
|
||||
return os.path.join(CD, "data", "773290480d5445f11d3dc1b800728966.exe_")
|
||||
elif name.startswith("3b13b"):
|
||||
return os.path.join(CD, "data", "3b13b6f1d7cd14dc4a097a12e2e505c0a4cff495262261e2bfc991df238b9b04.dll_")
|
||||
elif name == "7351f.elf":
|
||||
return os.path.join(CD, "data", "7351f8a40c5450557b24622417fc478d.elf_")
|
||||
else:
|
||||
raise ValueError("unexpected sample fixture: %s" % name)
|
||||
|
||||
@@ -241,6 +263,8 @@ def get_sample_md5_by_name(name):
|
||||
elif name.startswith("3b13b"):
|
||||
# file name is SHA256 hash
|
||||
return "56a6ffe6a02941028cc8235204eef31d"
|
||||
elif name == "7351f.elf":
|
||||
return "7351f8a40c5450557b24622417fc478d"
|
||||
else:
|
||||
raise ValueError("unexpected sample fixture: %s" % name)
|
||||
|
||||
@@ -272,7 +296,10 @@ def resolve_scope(scope):
|
||||
if scope == "file":
|
||||
|
||||
def inner_file(extractor):
|
||||
return extract_file_features(extractor)
|
||||
features = extract_file_features(extractor)
|
||||
for k, vs in extract_global_features(extractor).items():
|
||||
features[k].update(vs)
|
||||
return features
|
||||
|
||||
inner_file.__name__ = scope
|
||||
return inner_file
|
||||
@@ -285,7 +312,10 @@ def resolve_scope(scope):
|
||||
def inner_bb(extractor):
|
||||
f = get_function(extractor, fva)
|
||||
bb = get_basic_block(extractor, f, bbva)
|
||||
return extract_basic_block_features(extractor, f, bb)
|
||||
features = extract_basic_block_features(extractor, f, bb)
|
||||
for k, vs in extract_global_features(extractor).items():
|
||||
features[k].update(vs)
|
||||
return features
|
||||
|
||||
inner_bb.__name__ = scope
|
||||
return inner_bb
|
||||
@@ -295,7 +325,10 @@ def resolve_scope(scope):
|
||||
|
||||
def inner_function(extractor):
|
||||
f = get_function(extractor, va)
|
||||
return extract_function_features(extractor, f)
|
||||
features = extract_function_features(extractor, f)
|
||||
for k, vs in extract_global_features(extractor).items():
|
||||
features[k].update(vs)
|
||||
return features
|
||||
|
||||
inner_function.__name__ = scope
|
||||
return inner_function
|
||||
@@ -379,10 +412,10 @@ FEATURE_PRESENCE_TESTS = sorted(
|
||||
# insn/number: stack adjustments
|
||||
("mimikatz", "function=0x40105D", capa.features.insn.Number(0xC), False),
|
||||
("mimikatz", "function=0x40105D", capa.features.insn.Number(0x10), False),
|
||||
# insn/number: arch flavors
|
||||
# insn/number: bitness flavors
|
||||
("mimikatz", "function=0x40105D", capa.features.insn.Number(0xFF), True),
|
||||
("mimikatz", "function=0x40105D", capa.features.insn.Number(0xFF, arch=ARCH_X32), True),
|
||||
("mimikatz", "function=0x40105D", capa.features.insn.Number(0xFF, arch=ARCH_X64), False),
|
||||
("mimikatz", "function=0x40105D", capa.features.insn.Number(0xFF, bitness=BITNESS_X32), True),
|
||||
("mimikatz", "function=0x40105D", capa.features.insn.Number(0xFF, bitness=BITNESS_X64), False),
|
||||
# insn/offset
|
||||
("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x0), True),
|
||||
("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x4), True),
|
||||
@@ -395,10 +428,10 @@ FEATURE_PRESENCE_TESTS = sorted(
|
||||
# insn/offset: negative
|
||||
("mimikatz", "function=0x4011FB", capa.features.insn.Offset(-0x1), True),
|
||||
("mimikatz", "function=0x4011FB", capa.features.insn.Offset(-0x2), True),
|
||||
# insn/offset: arch flavors
|
||||
# insn/offset: bitness flavors
|
||||
("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x0), True),
|
||||
("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x0, arch=ARCH_X32), True),
|
||||
("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x0, arch=ARCH_X64), False),
|
||||
("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x0, bitness=BITNESS_X32), True),
|
||||
("mimikatz", "function=0x40105D", capa.features.insn.Offset(0x0, bitness=BITNESS_X64), False),
|
||||
# insn/api
|
||||
("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptAcquireContextW"), True),
|
||||
("mimikatz", "function=0x403BAC", capa.features.insn.API("advapi32.CryptAcquireContext"), True),
|
||||
@@ -448,9 +481,10 @@ FEATURE_PRESENCE_TESTS = sorted(
|
||||
("mimikatz", "function=0x40105D", capa.features.common.String("ACR > "), True),
|
||||
("mimikatz", "function=0x40105D", capa.features.common.String("nope"), False),
|
||||
("773290...", "function=0x140001140", capa.features.common.String(r"%s:\\OfficePackagesForWDAG"), True),
|
||||
# insn/regex, issue #262
|
||||
# insn/regex
|
||||
("pma16-01", "function=0x4021B0", capa.features.common.Regex("HTTP/1.0"), True),
|
||||
("pma16-01", "function=0x4021B0", capa.features.common.Regex("www.practicalmalwareanalysis.com"), False),
|
||||
("pma16-01", "function=0x402F40", capa.features.common.Regex("www.practicalmalwareanalysis.com"), True),
|
||||
("pma16-01", "function=0x402F40", capa.features.common.Substring("practicalmalwareanalysis.com"), True),
|
||||
# insn/string, pointer to string
|
||||
("mimikatz", "function=0x44EDEF", capa.features.common.String("INPUTEVENT"), True),
|
||||
# insn/string, direct memory reference
|
||||
@@ -499,6 +533,26 @@ FEATURE_PRESENCE_TESTS = sorted(
|
||||
("mimikatz", "function=0x456BB9", capa.features.common.Characteristic("calls to"), False),
|
||||
# file/function-name
|
||||
("pma16-01", "file", capa.features.file.FunctionName("__aulldiv"), True),
|
||||
# os & format & arch
|
||||
("pma16-01", "file", OS(OS_WINDOWS), True),
|
||||
("pma16-01", "file", OS(OS_LINUX), False),
|
||||
("pma16-01", "function=0x404356", OS(OS_WINDOWS), True),
|
||||
("pma16-01", "function=0x404356,bb=0x4043B9", OS(OS_WINDOWS), True),
|
||||
("pma16-01", "file", Arch(ARCH_I386), True),
|
||||
("pma16-01", "file", Arch(ARCH_AMD64), False),
|
||||
("pma16-01", "function=0x404356", Arch(ARCH_I386), True),
|
||||
("pma16-01", "function=0x404356,bb=0x4043B9", Arch(ARCH_I386), True),
|
||||
("pma16-01", "file", Format(FORMAT_PE), True),
|
||||
("pma16-01", "file", Format(FORMAT_ELF), False),
|
||||
# elf support
|
||||
("7351f.elf", "file", OS(OS_LINUX), True),
|
||||
("7351f.elf", "file", OS(OS_WINDOWS), False),
|
||||
("7351f.elf", "file", Format(FORMAT_ELF), True),
|
||||
("7351f.elf", "file", Format(FORMAT_PE), False),
|
||||
("7351f.elf", "file", Arch(ARCH_I386), False),
|
||||
("7351f.elf", "file", Arch(ARCH_AMD64), True),
|
||||
("7351f.elf", "function=0x408753", capa.features.common.String("/dev/null"), True),
|
||||
("7351f.elf", "function=0x408753,bb=0x408781", capa.features.insn.API("open"), True),
|
||||
],
|
||||
# order tests by (file, item)
|
||||
# so that our LRU cache is most effective.
|
||||
|
||||
@@ -284,6 +284,57 @@ def test_match_matched_rules():
|
||||
assert capa.features.common.MatchedRule("test rule2") in features
|
||||
|
||||
|
||||
def test_substring():
|
||||
rules = [
|
||||
capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
features:
|
||||
- and:
|
||||
- substring: abc
|
||||
"""
|
||||
)
|
||||
),
|
||||
]
|
||||
features, matches = capa.engine.match(
|
||||
capa.rules.topologically_order_rules(rules),
|
||||
{capa.features.common.String("aaaa"): {1}},
|
||||
0x0,
|
||||
)
|
||||
assert capa.features.common.MatchedRule("test rule") not in features
|
||||
|
||||
features, matches = capa.engine.match(
|
||||
capa.rules.topologically_order_rules(rules),
|
||||
{capa.features.common.String("abc"): {1}},
|
||||
0x0,
|
||||
)
|
||||
assert capa.features.common.MatchedRule("test rule") in features
|
||||
|
||||
features, matches = capa.engine.match(
|
||||
capa.rules.topologically_order_rules(rules),
|
||||
{capa.features.common.String("111abc222"): {1}},
|
||||
0x0,
|
||||
)
|
||||
assert capa.features.common.MatchedRule("test rule") in features
|
||||
|
||||
features, matches = capa.engine.match(
|
||||
capa.rules.topologically_order_rules(rules),
|
||||
{capa.features.common.String("111abc"): {1}},
|
||||
0x0,
|
||||
)
|
||||
assert capa.features.common.MatchedRule("test rule") in features
|
||||
|
||||
features, matches = capa.engine.match(
|
||||
capa.rules.topologically_order_rules(rules),
|
||||
{capa.features.common.String("abc222"): {1}},
|
||||
0x0,
|
||||
)
|
||||
assert capa.features.common.MatchedRule("test rule") in features
|
||||
|
||||
|
||||
def test_regex():
|
||||
rules = [
|
||||
capa.rules.Rule.from_yaml(
|
||||
@@ -474,11 +525,11 @@ def test_match_namespace():
|
||||
|
||||
def test_render_number():
|
||||
assert str(capa.features.insn.Number(1)) == "number(0x1)"
|
||||
assert str(capa.features.insn.Number(1, arch=capa.features.common.ARCH_X32)) == "number/x32(0x1)"
|
||||
assert str(capa.features.insn.Number(1, arch=capa.features.common.ARCH_X64)) == "number/x64(0x1)"
|
||||
assert str(capa.features.insn.Number(1, bitness=capa.features.common.BITNESS_X32)) == "number/x32(0x1)"
|
||||
assert str(capa.features.insn.Number(1, bitness=capa.features.common.BITNESS_X64)) == "number/x64(0x1)"
|
||||
|
||||
|
||||
def test_render_offset():
|
||||
assert str(capa.features.insn.Offset(1)) == "offset(0x1)"
|
||||
assert str(capa.features.insn.Offset(1, arch=capa.features.common.ARCH_X32)) == "offset/x32(0x1)"
|
||||
assert str(capa.features.insn.Offset(1, arch=capa.features.common.ARCH_X64)) == "offset/x64(0x1)"
|
||||
assert str(capa.features.insn.Offset(1, bitness=capa.features.common.BITNESS_X32)) == "offset/x32(0x1)"
|
||||
assert str(capa.features.insn.Offset(1, bitness=capa.features.common.BITNESS_X64)) == "offset/x64(0x1)"
|
||||
|
||||
@@ -20,9 +20,12 @@ import capa.features.file
|
||||
)
|
||||
def test_pefile_features(sample, scope, feature, expected):
|
||||
if scope.__name__ != "file":
|
||||
pytest.xfail("pefile only extract file scope features")
|
||||
pytest.xfail("pefile only extracts file scope features")
|
||||
|
||||
if isinstance(feature, capa.features.file.FunctionName):
|
||||
pytest.xfail("pefile only doesn't extract function names")
|
||||
pytest.xfail("pefile doesn't extract function names")
|
||||
|
||||
if ".elf" in sample:
|
||||
pytest.xfail("pefile doesn't handle ELF files")
|
||||
|
||||
fixtures.do_test_feature_presence(fixtures.get_pefile_extractor, sample, scope, feature, expected)
|
||||
|
||||
@@ -15,7 +15,21 @@ import capa.engine
|
||||
import capa.features.common
|
||||
from capa.features.file import FunctionName
|
||||
from capa.features.insn import Number, Offset
|
||||
from capa.features.common import ARCH_X32, ARCH_X64, String
|
||||
from capa.features.common import (
|
||||
OS,
|
||||
OS_LINUX,
|
||||
ARCH_I386,
|
||||
FORMAT_PE,
|
||||
ARCH_AMD64,
|
||||
FORMAT_ELF,
|
||||
OS_WINDOWS,
|
||||
BITNESS_X32,
|
||||
BITNESS_X64,
|
||||
Arch,
|
||||
Format,
|
||||
String,
|
||||
Substring,
|
||||
)
|
||||
|
||||
|
||||
def test_rule_ctor():
|
||||
@@ -517,7 +531,7 @@ def test_invalid_number():
|
||||
)
|
||||
|
||||
|
||||
def test_number_arch():
|
||||
def test_number_bitness():
|
||||
r = capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
@@ -529,13 +543,13 @@ def test_number_arch():
|
||||
"""
|
||||
)
|
||||
)
|
||||
assert r.evaluate({Number(2, arch=ARCH_X32): {1}}) == True
|
||||
assert r.evaluate({Number(2, bitness=BITNESS_X32): {1}}) == True
|
||||
|
||||
assert r.evaluate({Number(2): {1}}) == False
|
||||
assert r.evaluate({Number(2, arch=ARCH_X64): {1}}) == False
|
||||
assert r.evaluate({Number(2, bitness=BITNESS_X64): {1}}) == False
|
||||
|
||||
|
||||
def test_number_arch_symbol():
|
||||
def test_number_bitness_symbol():
|
||||
r = capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
@@ -547,7 +561,7 @@ def test_number_arch_symbol():
|
||||
"""
|
||||
)
|
||||
)
|
||||
assert r.evaluate({Number(2, arch=ARCH_X32, description="some constant"): {1}}) == True
|
||||
assert r.evaluate({Number(2, bitness=BITNESS_X32, description="some constant"): {1}}) == True
|
||||
|
||||
|
||||
def test_offset_symbol():
|
||||
@@ -595,7 +609,7 @@ def test_count_offset_symbol():
|
||||
assert r.evaluate({Offset(0x100, description="symbol name"): {1, 2, 3}}) == True
|
||||
|
||||
|
||||
def test_offset_arch():
|
||||
def test_offset_bitness():
|
||||
r = capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
@@ -607,13 +621,13 @@ def test_offset_arch():
|
||||
"""
|
||||
)
|
||||
)
|
||||
assert r.evaluate({Offset(2, arch=ARCH_X32): {1}}) == True
|
||||
assert r.evaluate({Offset(2, bitness=BITNESS_X32): {1}}) == True
|
||||
|
||||
assert r.evaluate({Offset(2): {1}}) == False
|
||||
assert r.evaluate({Offset(2, arch=ARCH_X64): {1}}) == False
|
||||
assert r.evaluate({Offset(2, bitness=BITNESS_X64): {1}}) == False
|
||||
|
||||
|
||||
def test_offset_arch_symbol():
|
||||
def test_offset_bitness_symbol():
|
||||
r = capa.rules.Rule.from_yaml(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
@@ -625,7 +639,7 @@ def test_offset_arch_symbol():
|
||||
"""
|
||||
)
|
||||
)
|
||||
assert r.evaluate({Offset(2, arch=ARCH_X32, description="some constant"): {1}}) == True
|
||||
assert r.evaluate({Offset(2, bitness=BITNESS_X32, description="some constant"): {1}}) == True
|
||||
|
||||
|
||||
def test_invalid_offset():
|
||||
@@ -734,6 +748,43 @@ def test_string_values_special_characters():
|
||||
assert (String("bye\nbye") in children) == True
|
||||
|
||||
|
||||
def test_substring_feature():
|
||||
rule = textwrap.dedent(
|
||||
"""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
features:
|
||||
- or:
|
||||
- substring: abc
|
||||
- substring: "def"
|
||||
- substring: "gh\\ni"
|
||||
"""
|
||||
)
|
||||
r = capa.rules.Rule.from_yaml(rule)
|
||||
children = list(r.statement.get_children())
|
||||
assert (Substring("abc") in children) == True
|
||||
assert (Substring("def") in children) == True
|
||||
assert (Substring("gh\ni") in children) == True
|
||||
|
||||
|
||||
def test_substring_description():
|
||||
rule = textwrap.dedent(
|
||||
"""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
features:
|
||||
- or:
|
||||
- substring: abc
|
||||
description: the start of the alphabet
|
||||
"""
|
||||
)
|
||||
r = capa.rules.Rule.from_yaml(rule)
|
||||
children = list(r.statement.get_children())
|
||||
assert (Substring("abc") in children) == True
|
||||
|
||||
|
||||
def test_regex_values_always_string():
|
||||
rules = [
|
||||
capa.rules.Rule.from_yaml(
|
||||
@@ -944,3 +995,57 @@ def test_function_name_features():
|
||||
assert (FunctionName("strcpy") in children) == True
|
||||
assert (FunctionName("strcmp", description="copy from here to there") in children) == True
|
||||
assert (FunctionName("strdup", description="duplicate a string") in children) == True
|
||||
|
||||
|
||||
def test_os_features():
|
||||
rule = textwrap.dedent(
|
||||
"""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
scope: file
|
||||
features:
|
||||
- and:
|
||||
- os: windows
|
||||
"""
|
||||
)
|
||||
r = capa.rules.Rule.from_yaml(rule)
|
||||
children = list(r.statement.get_children())
|
||||
assert (OS(OS_WINDOWS) in children) == True
|
||||
assert (OS(OS_LINUX) not in children) == True
|
||||
|
||||
|
||||
def test_format_features():
|
||||
rule = textwrap.dedent(
|
||||
"""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
scope: file
|
||||
features:
|
||||
- and:
|
||||
- format: pe
|
||||
"""
|
||||
)
|
||||
r = capa.rules.Rule.from_yaml(rule)
|
||||
children = list(r.statement.get_children())
|
||||
assert (Format(FORMAT_PE) in children) == True
|
||||
assert (Format(FORMAT_ELF) not in children) == True
|
||||
|
||||
|
||||
def test_arch_features():
|
||||
rule = textwrap.dedent(
|
||||
"""
|
||||
rule:
|
||||
meta:
|
||||
name: test rule
|
||||
scope: file
|
||||
features:
|
||||
- and:
|
||||
- arch: amd64
|
||||
"""
|
||||
)
|
||||
r = capa.rules.Rule.from_yaml(rule)
|
||||
children = list(r.statement.get_children())
|
||||
assert (Arch(ARCH_AMD64) in children) == True
|
||||
assert (Arch(ARCH_I386) not in children) == True
|
||||
|
||||
Reference in New Issue
Block a user