mirror of
https://github.com/mandiant/capa.git
synced 2025-12-18 10:19:15 -08:00
Compare commits
151 Commits
mr/library
...
codecut
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d89083ab31 | ||
|
|
891fa8aaa3 | ||
|
|
e94147b4c2 | ||
|
|
6fc4567f0c | ||
|
|
3b1a8f5b5a | ||
|
|
f296e7d423 | ||
|
|
3e02b67480 | ||
|
|
536526f61d | ||
|
|
bcd2c3fb35 | ||
|
|
f340b93a02 | ||
|
|
8bd6f8b99a | ||
|
|
8b4d5d3d22 | ||
|
|
bc6e18ed85 | ||
|
|
2426aba223 | ||
|
|
405e189267 | ||
|
|
cfb632edc8 | ||
|
|
70c96a29b4 | ||
|
|
c005de0a0d | ||
|
|
8d42b14b20 | ||
|
|
bad32b91fb | ||
|
|
9716da4765 | ||
|
|
e0784f2e85 | ||
|
|
4a775bab2e | ||
|
|
2de7830f5e | ||
|
|
9d67e133c9 | ||
|
|
fa18b4e201 | ||
|
|
c3c93685e2 | ||
|
|
462e11443e | ||
|
|
32d6181f02 | ||
|
|
6cf944b321 | ||
|
|
369fbc713e | ||
|
|
e3a1dbfac2 | ||
|
|
e5fe935a8e | ||
|
|
233f8dcf9f | ||
|
|
51d606bc0d | ||
|
|
2b46796d08 | ||
|
|
81f7f43b5b | ||
|
|
1f34795fce | ||
|
|
06f0012183 | ||
|
|
55720ddbfd | ||
|
|
893378c10e | ||
|
|
1a82b9d0c5 | ||
|
|
3cbc184020 | ||
|
|
347601a112 | ||
|
|
8a02b0773d | ||
|
|
f11661f8f2 | ||
|
|
518dc3381c | ||
|
|
5c60adaf96 | ||
|
|
4ab8d75629 | ||
|
|
51d852d1b3 | ||
|
|
aa8e4603d1 | ||
|
|
6c61a91778 | ||
|
|
e633e34517 | ||
|
|
9c72c9067b | ||
|
|
168435cf75 | ||
|
|
5fdf7e61e2 | ||
|
|
95fc747e6f | ||
|
|
1f374e4986 | ||
|
|
28c0234339 | ||
|
|
f57f909e68 | ||
|
|
02c359f79f | ||
|
|
4448d612f1 | ||
|
|
d7cf8d1251 | ||
|
|
d1f3e43325 | ||
|
|
83a46265df | ||
|
|
0c64bd4985 | ||
|
|
ed86e5fb1b | ||
|
|
e1c786466a | ||
|
|
959a234f0e | ||
|
|
e57de2beb4 | ||
|
|
9c9b3711c0 | ||
|
|
65e2dac4c4 | ||
|
|
9ad3f06e1d | ||
|
|
201ec07b58 | ||
|
|
c85be8dc72 | ||
|
|
54952feb07 | ||
|
|
379d6ef313 | ||
|
|
28fcd10d2e | ||
|
|
a6481df6c4 | ||
|
|
abe80842cb | ||
|
|
b6763ac5fe | ||
|
|
5a284de438 | ||
|
|
8cfccbcb44 | ||
|
|
01772d0de0 | ||
|
|
f0042157ab | ||
|
|
6a2330c11a | ||
|
|
02b5e11380 | ||
|
|
32c428b989 | ||
|
|
20909c1d95 | ||
|
|
035b4f6ae6 | ||
|
|
cb002567c4 | ||
|
|
46c513c0a9 | ||
|
|
0f0523d2ba | ||
|
|
688841fd3b | ||
|
|
2a6ba62379 | ||
|
|
ca7580d417 | ||
|
|
7c01712843 | ||
|
|
ef02e4fe83 | ||
|
|
d51074385b | ||
|
|
d9ea57d29d | ||
|
|
8b7ec049f4 | ||
|
|
c05e01cc3a | ||
|
|
11bb0c3fbd | ||
|
|
93da346f32 | ||
|
|
3a2056b701 | ||
|
|
915f3b0511 | ||
|
|
cd61983e43 | ||
|
|
9627f7e5c3 | ||
|
|
3ebec9ec2b | ||
|
|
295cd413bb | ||
|
|
03e4778620 | ||
|
|
e8ad207245 | ||
|
|
a31bd2cd15 | ||
|
|
9118946ecb | ||
|
|
7b32706bd4 | ||
|
|
c632d594a6 | ||
|
|
4398b8ac31 | ||
|
|
ec697c01f9 | ||
|
|
097ed73ccd | ||
|
|
4e121ae24f | ||
|
|
322e7a934e | ||
|
|
7d983af907 | ||
|
|
77758e8922 | ||
|
|
296255f581 | ||
|
|
0237059cbd | ||
|
|
3241ee599f | ||
|
|
24236dda0e | ||
|
|
d4d856767d | ||
|
|
35767e6c6a | ||
|
|
7d8ee6aaac | ||
|
|
23709c9d6a | ||
|
|
bc72b6d14e | ||
|
|
13b1e533f5 | ||
|
|
7cc3ddd4ea | ||
|
|
20ae098cda | ||
|
|
2987eeb0ac | ||
|
|
cebf8e7274 | ||
|
|
d74225b5e0 | ||
|
|
70610cd1c5 | ||
|
|
338107cf9e | ||
|
|
6b88eed1e4 | ||
|
|
54badc323d | ||
|
|
2e2e1bc277 | ||
|
|
84c9da09e0 | ||
|
|
b2f89695b5 | ||
|
|
bc91171c65 | ||
|
|
69190dfa82 | ||
|
|
688afab087 | ||
|
|
6447319cc7 | ||
|
|
7be6fe6ae1 | ||
|
|
ca7073ce87 |
@@ -1,6 +1,6 @@
|
|||||||
# See here for image contents: https://github.com/microsoft/vscode-dev-containers/tree/v0.233.0/containers/python-3/.devcontainer/base.Dockerfile
|
# See here for image contents: https://github.com/microsoft/vscode-dev-containers/tree/v0.233.0/containers/python-3/.devcontainer/base.Dockerfile
|
||||||
|
|
||||||
# [Choice] Python version (use -bullseye variants on local arm64/Apple Silicon): 3, 3.10, 3.9, 3.8, 3.7, 3.6, 3-bullseye, 3.10-bullseye, 3.9-bullseye, 3.8-bullseye, 3.7-bullseye, 3.6-bullseye, 3-buster, 3.10-buster, 3.9-buster, 3.8-buster, 3.7-buster, 3.6-buster
|
# [Choice] Python version (use -bullseye variants on local arm64/Apple Silicon): 3, 3.10, 3-bullseye, 3.10-bullseye, 3-buster, 3.10-buster, etc.
|
||||||
ARG VARIANT="3.10-bullseye"
|
ARG VARIANT="3.10-bullseye"
|
||||||
FROM mcr.microsoft.com/vscode/devcontainers/python:0-${VARIANT}
|
FROM mcr.microsoft.com/vscode/devcontainers/python:0-${VARIANT}
|
||||||
|
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
"dockerfile": "Dockerfile",
|
"dockerfile": "Dockerfile",
|
||||||
"context": "..",
|
"context": "..",
|
||||||
"args": {
|
"args": {
|
||||||
// Update 'VARIANT' to pick a Python version: 3, 3.10, 3.9, 3.8, 3.7, 3.6
|
// Update 'VARIANT' to pick a Python version: 3, 3.10, etc.
|
||||||
// Append -bullseye or -buster to pin to an OS version.
|
// Append -bullseye or -buster to pin to an OS version.
|
||||||
// Use -bullseye variants on local on arm64/Apple Silicon.
|
// Use -bullseye variants on local on arm64/Apple Silicon.
|
||||||
"VARIANT": "3.10",
|
"VARIANT": "3.10",
|
||||||
|
|||||||
13
.github/workflows/build.yml
vendored
13
.github/workflows/build.yml
vendored
@@ -21,26 +21,25 @@ jobs:
|
|||||||
# set to false for debugging
|
# set to false for debugging
|
||||||
fail-fast: true
|
fail-fast: true
|
||||||
matrix:
|
matrix:
|
||||||
# using Python 3.8 to support running across multiple operating systems including Windows 7
|
|
||||||
include:
|
include:
|
||||||
- os: ubuntu-20.04
|
- os: ubuntu-20.04
|
||||||
# use old linux so that the shared library versioning is more portable
|
# use old linux so that the shared library versioning is more portable
|
||||||
artifact_name: capa
|
artifact_name: capa
|
||||||
asset_name: linux
|
asset_name: linux
|
||||||
python_version: 3.8
|
python_version: '3.10'
|
||||||
- os: ubuntu-20.04
|
- os: ubuntu-20.04
|
||||||
artifact_name: capa
|
artifact_name: capa
|
||||||
asset_name: linux-py312
|
asset_name: linux-py312
|
||||||
python_version: 3.12
|
python_version: '3.12'
|
||||||
- os: windows-2019
|
- os: windows-2019
|
||||||
artifact_name: capa.exe
|
artifact_name: capa.exe
|
||||||
asset_name: windows
|
asset_name: windows
|
||||||
python_version: 3.8
|
python_version: '3.10'
|
||||||
- os: macos-12
|
- os: macos-13
|
||||||
# use older macOS for assumed better portability
|
# use older macOS for assumed better portability
|
||||||
artifact_name: capa
|
artifact_name: capa
|
||||||
asset_name: macos
|
asset_name: macos
|
||||||
python_version: 3.8
|
python_version: '3.10'
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout capa
|
- name: Checkout capa
|
||||||
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
|
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
|
||||||
@@ -107,7 +106,7 @@ jobs:
|
|||||||
# upload zipped binaries to Release page
|
# upload zipped binaries to Release page
|
||||||
if: github.event_name == 'release'
|
if: github.event_name == 'release'
|
||||||
name: zip and upload ${{ matrix.asset_name }}
|
name: zip and upload ${{ matrix.asset_name }}
|
||||||
runs-on: ubuntu-20.04
|
runs-on: ubuntu-latest
|
||||||
needs: [build]
|
needs: [build]
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
|
|||||||
7
.github/workflows/changelog.yml
vendored
7
.github/workflows/changelog.yml
vendored
@@ -13,8 +13,11 @@ permissions:
|
|||||||
jobs:
|
jobs:
|
||||||
check_changelog:
|
check_changelog:
|
||||||
# no need to check for dependency updates via dependabot
|
# no need to check for dependency updates via dependabot
|
||||||
if: github.actor != 'dependabot[bot]' && github.actor != 'dependabot-preview[bot]'
|
# github.event.pull_request.user.login refers to PR author
|
||||||
runs-on: ubuntu-20.04
|
if: |
|
||||||
|
github.event.pull_request.user.login != 'dependabot[bot]' &&
|
||||||
|
github.event.pull_request.user.login != 'dependabot-preview[bot]'
|
||||||
|
runs-on: ubuntu-latest
|
||||||
env:
|
env:
|
||||||
NO_CHANGELOG: '[x] No CHANGELOG update needed'
|
NO_CHANGELOG: '[x] No CHANGELOG update needed'
|
||||||
steps:
|
steps:
|
||||||
|
|||||||
2
.github/workflows/publish.yml
vendored
2
.github/workflows/publish.yml
vendored
@@ -21,7 +21,7 @@ jobs:
|
|||||||
- name: Set up Python
|
- name: Set up Python
|
||||||
uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
|
uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
|
||||||
with:
|
with:
|
||||||
python-version: '3.8'
|
python-version: '3.10'
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: |
|
run: |
|
||||||
python -m pip install --upgrade pip
|
python -m pip install --upgrade pip
|
||||||
|
|||||||
2
.github/workflows/tag.yml
vendored
2
.github/workflows/tag.yml
vendored
@@ -9,7 +9,7 @@ permissions: read-all
|
|||||||
jobs:
|
jobs:
|
||||||
tag:
|
tag:
|
||||||
name: Tag capa rules
|
name: Tag capa rules
|
||||||
runs-on: ubuntu-20.04
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout capa-rules
|
- name: Checkout capa-rules
|
||||||
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
|
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
|
||||||
|
|||||||
30
.github/workflows/tests.yml
vendored
30
.github/workflows/tests.yml
vendored
@@ -26,7 +26,7 @@ env:
|
|||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
changelog_format:
|
changelog_format:
|
||||||
runs-on: ubuntu-20.04
|
runs-on: ubuntu-22.04
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout capa
|
- name: Checkout capa
|
||||||
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
|
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
|
||||||
@@ -37,15 +37,15 @@ jobs:
|
|||||||
if [ $number != 1 ]; then exit 1; fi
|
if [ $number != 1 ]; then exit 1; fi
|
||||||
|
|
||||||
code_style:
|
code_style:
|
||||||
runs-on: ubuntu-20.04
|
runs-on: ubuntu-22.04
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout capa
|
- name: Checkout capa
|
||||||
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
|
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
|
||||||
# use latest available python to take advantage of best performance
|
# use latest available python to take advantage of best performance
|
||||||
- name: Set up Python 3.11
|
- name: Set up Python 3.12
|
||||||
uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
|
uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
|
||||||
with:
|
with:
|
||||||
python-version: "3.11"
|
python-version: "3.12"
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: |
|
run: |
|
||||||
pip install -r requirements.txt
|
pip install -r requirements.txt
|
||||||
@@ -64,16 +64,16 @@ jobs:
|
|||||||
run: pre-commit run deptry --hook-stage manual
|
run: pre-commit run deptry --hook-stage manual
|
||||||
|
|
||||||
rule_linter:
|
rule_linter:
|
||||||
runs-on: ubuntu-20.04
|
runs-on: ubuntu-22.04
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout capa with submodules
|
- name: Checkout capa with submodules
|
||||||
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
|
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
|
||||||
with:
|
with:
|
||||||
submodules: recursive
|
submodules: recursive
|
||||||
- name: Set up Python 3.11
|
- name: Set up Python 3.12
|
||||||
uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
|
uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
|
||||||
with:
|
with:
|
||||||
python-version: "3.11"
|
python-version: "3.12"
|
||||||
- name: Install capa
|
- name: Install capa
|
||||||
run: |
|
run: |
|
||||||
pip install -r requirements.txt
|
pip install -r requirements.txt
|
||||||
@@ -88,17 +88,17 @@ jobs:
|
|||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
os: [ubuntu-20.04, windows-2019, macos-12]
|
os: [ubuntu-20.04, windows-2019, macos-13]
|
||||||
# across all operating systems
|
# across all operating systems
|
||||||
python-version: ["3.8", "3.11"]
|
python-version: ["3.10", "3.11"]
|
||||||
include:
|
include:
|
||||||
# on Ubuntu run these as well
|
# on Ubuntu run these as well
|
||||||
- os: ubuntu-20.04
|
|
||||||
python-version: "3.8"
|
|
||||||
- os: ubuntu-20.04
|
|
||||||
python-version: "3.9"
|
|
||||||
- os: ubuntu-20.04
|
- os: ubuntu-20.04
|
||||||
python-version: "3.10"
|
python-version: "3.10"
|
||||||
|
- os: ubuntu-20.04
|
||||||
|
python-version: "3.11"
|
||||||
|
- os: ubuntu-20.04
|
||||||
|
python-version: "3.12"
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout capa with submodules
|
- name: Checkout capa with submodules
|
||||||
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
|
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
|
||||||
@@ -131,7 +131,7 @@ jobs:
|
|||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
python-version: ["3.9", "3.11"]
|
python-version: ["3.10", "3.11"]
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout capa with submodules
|
- name: Checkout capa with submodules
|
||||||
# do only run if BN_SERIAL is available, have to do this in every step, see https://github.com/orgs/community/discussions/26726#discussioncomment-3253118
|
# do only run if BN_SERIAL is available, have to do this in every step, see https://github.com/orgs/community/discussions/26726#discussioncomment-3253118
|
||||||
@@ -173,7 +173,7 @@ jobs:
|
|||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
python-version: ["3.8", "3.11"]
|
python-version: ["3.10", "3.11"]
|
||||||
java-version: ["17"]
|
java-version: ["17"]
|
||||||
ghidra-version: ["11.0.1"]
|
ghidra-version: ["11.0.1"]
|
||||||
public-version: ["PUBLIC_20240130"] # for ghidra releases
|
public-version: ["PUBLIC_20240130"] # for ghidra releases
|
||||||
|
|||||||
103
.github/workflows/web-release.yml
vendored
Normal file
103
.github/workflows/web-release.yml
vendored
Normal file
@@ -0,0 +1,103 @@
|
|||||||
|
name: create web release
|
||||||
|
on:
|
||||||
|
workflow_dispatch:
|
||||||
|
inputs:
|
||||||
|
version:
|
||||||
|
description: 'Version number for the release (x.x.x)'
|
||||||
|
required: true
|
||||||
|
type: string
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
run-tests:
|
||||||
|
uses: ./.github/workflows/web-tests.yml
|
||||||
|
|
||||||
|
build-and-release:
|
||||||
|
needs: run-tests
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Set release name
|
||||||
|
run: echo "RELEASE_NAME=capa-explorer-web-v${{ github.event.inputs.version }}-${GITHUB_SHA::7}" >> $GITHUB_ENV
|
||||||
|
|
||||||
|
- name: Check if release already exists
|
||||||
|
run: |
|
||||||
|
if ls web/explorer/releases/capa-explorer-web-v${{ github.event.inputs.version }}-* 1> /dev/null 2>&1; then
|
||||||
|
echo "::error:: A release with version ${{ github.event.inputs.version }} already exists"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Set up Node.js
|
||||||
|
uses: actions/setup-node@0a44ba7841725637a19e28fa30b79a866c81b0a6 # v4.0.4
|
||||||
|
with:
|
||||||
|
node-version: 20
|
||||||
|
cache: 'npm'
|
||||||
|
cache-dependency-path: 'web/explorer/package-lock.json'
|
||||||
|
|
||||||
|
- name: Install dependencies
|
||||||
|
run: npm ci
|
||||||
|
working-directory: web/explorer
|
||||||
|
|
||||||
|
- name: Build offline bundle
|
||||||
|
run: npm run build:bundle
|
||||||
|
working-directory: web/explorer
|
||||||
|
|
||||||
|
- name: Compress bundle
|
||||||
|
run: zip -r ${{ env.RELEASE_NAME }}.zip capa-explorer-web
|
||||||
|
working-directory: web/explorer
|
||||||
|
|
||||||
|
- name: Create releases directory
|
||||||
|
run: mkdir -vp web/explorer/releases
|
||||||
|
|
||||||
|
- name: Move release to releases folder
|
||||||
|
run: mv web/explorer/${{ env.RELEASE_NAME }}.zip web/explorer/releases
|
||||||
|
|
||||||
|
- name: Compute release SHA256 hash
|
||||||
|
run: |
|
||||||
|
echo "RELEASE_SHA256=$(sha256sum web/explorer/releases/${{ env.RELEASE_NAME }}.zip | awk '{print $1}')" >> $GITHUB_ENV
|
||||||
|
|
||||||
|
- name: Update CHANGELOG.md
|
||||||
|
run: |
|
||||||
|
echo "## ${{ env.RELEASE_NAME }}" >> web/explorer/releases/CHANGELOG.md
|
||||||
|
echo "- Release Date: $(date -u '+%Y-%m-%d %H:%M:%S UTC')" >> web/explorer/releases/CHANGELOG.md
|
||||||
|
echo "- SHA256: ${{ env.RELEASE_SHA256 }}" >> web/explorer/releases/CHANGELOG.md
|
||||||
|
echo "" >> web/explorer/releases/CHANGELOG.md
|
||||||
|
cat web/explorer/releases/CHANGELOG.md
|
||||||
|
|
||||||
|
- name: Remove older releases
|
||||||
|
# keep only the latest 3 releases
|
||||||
|
run: ls -t capa-explorer-web-v*.zip | tail -n +4 | xargs -r rm --
|
||||||
|
working-directory: web/explorer/releases
|
||||||
|
|
||||||
|
- name: Stage release files
|
||||||
|
run: |
|
||||||
|
git config --local user.email "capa-dev@mandiant.com"
|
||||||
|
git config --local user.name "Capa Bot"
|
||||||
|
git add -f web/explorer/releases/${{ env.RELEASE_NAME }}.zip web/explorer/releases/CHANGELOG.md
|
||||||
|
git add -u web/explorer/releases/
|
||||||
|
|
||||||
|
- name: Create Pull Request
|
||||||
|
uses: peter-evans/create-pull-request@5e914681df9dc83aa4e4905692ca88beb2f9e91f # v7.0.5
|
||||||
|
with:
|
||||||
|
token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
title: "explorer web: add release v${{ github.event.inputs.version }}"
|
||||||
|
body: |
|
||||||
|
This PR adds a new capa Explorer Web release v${{ github.event.inputs.version }}.
|
||||||
|
|
||||||
|
Release details:
|
||||||
|
- Name: ${{ env.RELEASE_NAME }}
|
||||||
|
- SHA256: ${{ env.RELEASE_SHA256 }}
|
||||||
|
|
||||||
|
This release is generated by the [web release](https://github.com/mandiant/capa/actions/workflows/web-release.yml) workflow.
|
||||||
|
|
||||||
|
- [x] No CHANGELOG update needed
|
||||||
|
- [x] No new tests needed
|
||||||
|
- [x] No documentation update needed
|
||||||
|
commit-message: ":robot: explorer web: add release ${{ env.RELEASE_NAME }}"
|
||||||
|
branch: release/web-v${{ github.event.inputs.version }}
|
||||||
|
add-paths: web/explorer/releases/${{ env.RELEASE_NAME }}.zip
|
||||||
|
base: master
|
||||||
|
labels: webui
|
||||||
|
delete-branch: true
|
||||||
|
committer: Capa Bot <capa-dev@mandiant.com>
|
||||||
|
author: Capa Bot <capa-dev@mandiant.com>
|
||||||
13
.github/workflows/web-tests.yml
vendored
13
.github/workflows/web-tests.yml
vendored
@@ -1,10 +1,11 @@
|
|||||||
name: Capa Explorer Web tests
|
name: capa Explorer Web tests
|
||||||
|
|
||||||
on:
|
on:
|
||||||
pull_request:
|
pull_request:
|
||||||
branches: [ master ]
|
branches: [ master ]
|
||||||
paths:
|
paths:
|
||||||
- 'web/explorer/**'
|
- 'web/explorer/**'
|
||||||
|
workflow_call: # this allows the workflow to be called by other workflows
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
test:
|
test:
|
||||||
@@ -23,20 +24,20 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
node-version: 20
|
node-version: 20
|
||||||
cache: 'npm'
|
cache: 'npm'
|
||||||
cache-dependency-path: './web/explorer/package-lock.json'
|
cache-dependency-path: 'web/explorer/package-lock.json'
|
||||||
|
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: npm ci
|
run: npm ci
|
||||||
working-directory: ./web/explorer
|
working-directory: web/explorer
|
||||||
|
|
||||||
- name: Lint
|
- name: Lint
|
||||||
run: npm run lint
|
run: npm run lint
|
||||||
working-directory: ./web/explorer
|
working-directory: web/explorer
|
||||||
|
|
||||||
- name: Format
|
- name: Format
|
||||||
run: npm run format:check
|
run: npm run format:check
|
||||||
working-directory: ./web/explorer
|
working-directory: web/explorer
|
||||||
|
|
||||||
- name: Run unit tests
|
- name: Run unit tests
|
||||||
run: npm run test
|
run: npm run test
|
||||||
working-directory: ./web/explorer
|
working-directory: web/explorer
|
||||||
|
|||||||
@@ -25,7 +25,7 @@ repos:
|
|||||||
hooks:
|
hooks:
|
||||||
- id: isort
|
- id: isort
|
||||||
name: isort
|
name: isort
|
||||||
stages: [commit, push, manual]
|
stages: [pre-commit, pre-push, manual]
|
||||||
language: system
|
language: system
|
||||||
entry: isort
|
entry: isort
|
||||||
args:
|
args:
|
||||||
@@ -46,7 +46,7 @@ repos:
|
|||||||
hooks:
|
hooks:
|
||||||
- id: black
|
- id: black
|
||||||
name: black
|
name: black
|
||||||
stages: [commit, push, manual]
|
stages: [pre-commit, pre-push, manual]
|
||||||
language: system
|
language: system
|
||||||
entry: black
|
entry: black
|
||||||
args:
|
args:
|
||||||
@@ -64,7 +64,7 @@ repos:
|
|||||||
hooks:
|
hooks:
|
||||||
- id: ruff
|
- id: ruff
|
||||||
name: ruff
|
name: ruff
|
||||||
stages: [commit, push, manual]
|
stages: [pre-commit, pre-push, manual]
|
||||||
language: system
|
language: system
|
||||||
entry: ruff
|
entry: ruff
|
||||||
args:
|
args:
|
||||||
@@ -82,7 +82,7 @@ repos:
|
|||||||
hooks:
|
hooks:
|
||||||
- id: flake8
|
- id: flake8
|
||||||
name: flake8
|
name: flake8
|
||||||
stages: [push, manual]
|
stages: [pre-push, manual]
|
||||||
language: system
|
language: system
|
||||||
entry: flake8
|
entry: flake8
|
||||||
args:
|
args:
|
||||||
@@ -101,13 +101,14 @@ repos:
|
|||||||
hooks:
|
hooks:
|
||||||
- id: mypy
|
- id: mypy
|
||||||
name: mypy
|
name: mypy
|
||||||
stages: [push, manual]
|
stages: [pre-push, manual]
|
||||||
language: system
|
language: system
|
||||||
entry: mypy
|
entry: mypy
|
||||||
args:
|
args:
|
||||||
- "--check-untyped-defs"
|
- "--check-untyped-defs"
|
||||||
- "--ignore-missing-imports"
|
- "--ignore-missing-imports"
|
||||||
- "--config-file=.github/mypy/mypy.ini"
|
- "--config-file=.github/mypy/mypy.ini"
|
||||||
|
- "--enable-incomplete-feature=NewGenericSyntax"
|
||||||
- "capa/"
|
- "capa/"
|
||||||
- "scripts/"
|
- "scripts/"
|
||||||
- "tests/"
|
- "tests/"
|
||||||
@@ -119,7 +120,7 @@ repos:
|
|||||||
hooks:
|
hooks:
|
||||||
- id: deptry
|
- id: deptry
|
||||||
name: deptry
|
name: deptry
|
||||||
stages: [push, manual]
|
stages: [pre-push, manual]
|
||||||
language: system
|
language: system
|
||||||
entry: deptry .
|
entry: deptry .
|
||||||
always_run: true
|
always_run: true
|
||||||
|
|||||||
130
CHANGELOG.md
130
CHANGELOG.md
@@ -12,8 +12,9 @@
|
|||||||
|
|
||||||
### Bug Fixes
|
### Bug Fixes
|
||||||
|
|
||||||
- extractor: fix exception when PE extractor encounters unknown architecture #2440 @Tamir-K
|
- vmray: load more analysis archives @mr-tz
|
||||||
- IDA Pro: rename ida to idapro module for plugin and idalib in IDA 9.0 #2453 @mr-tz
|
- dynamic: only check file limitations for static file formats @mr-tz
|
||||||
|
- vmray: skip non-printable strings @mike-hunhoff
|
||||||
|
|
||||||
### capa Explorer Web
|
### capa Explorer Web
|
||||||
|
|
||||||
@@ -22,8 +23,127 @@
|
|||||||
### Development
|
### Development
|
||||||
|
|
||||||
### Raw diffs
|
### Raw diffs
|
||||||
- [capa v7.4.0...master](https://github.com/mandiant/capa/compare/v7.4.0...master)
|
- [capa v8.0.1...master](https://github.com/mandiant/capa/compare/v8.0.1...master)
|
||||||
- [capa-rules v7.4.0...master](https://github.com/mandiant/capa-rules/compare/v7.4.0...master)
|
- [capa-rules v8.0.1...master](https://github.com/mandiant/capa-rules/compare/v8.0.1...master)
|
||||||
|
|
||||||
|
## v8.0.1
|
||||||
|
|
||||||
|
This point release fixes an issue with the IDAPython API to now handle IDA Pro 8.3, 8.4, and 9.0 correctly.
|
||||||
|
|
||||||
|
### Bug Fixes
|
||||||
|
|
||||||
|
- handle IDA 8.3/8.4 vs. 9.0 API change @mr-tz
|
||||||
|
|
||||||
|
### Raw diffs
|
||||||
|
- [capa v8.0.0...v8.0.1](https://github.com/mandiant/capa/compare/v8.0.0...v8.0.1)
|
||||||
|
- [capa-rules v8.0.0...v8.0.1](https://github.com/mandiant/capa-rules/compare/v8.0.0...v8.0.1)
|
||||||
|
|
||||||
|
## v8.0.0
|
||||||
|
|
||||||
|
capa version 8 adds support for IDA Pro 9.0 (and idalib). The release comes with various improvements and bug fixes for the Binary Ninja backend (including to load with database files) -- thanks to @xusheng6.
|
||||||
|
|
||||||
|
Additional bug fixes improve the dynamic and BinExport backends.
|
||||||
|
|
||||||
|
capa version 8 now requires Python 3.10 or newer.
|
||||||
|
|
||||||
|
Special thanks to @Tamir-K, @harshit-wadhwani, @jorik-utwente for their great contributions.
|
||||||
|
|
||||||
|
### New Features
|
||||||
|
|
||||||
|
- allow call as valid subscope for call scoped rules @mr-tz
|
||||||
|
- support loading and analyzing a Binary Ninja database #2496 @xusheng6
|
||||||
|
- vmray: record process command line details @mr-tz
|
||||||
|
|
||||||
|
### Breaking Changes
|
||||||
|
|
||||||
|
- remove support for Python 3.8 and use Python 3.10 as minimum now #1966 @mr-tz
|
||||||
|
|
||||||
|
### New Rules (54)
|
||||||
|
|
||||||
|
- nursery/get-shadow-password-file-entry-on-linux jonathanlepore@google.com
|
||||||
|
- nursery/set-shadow-password-file-entry-on-linux jonathanlepore@google.com
|
||||||
|
- collection/browser/get-chrome-cookiemonster still@teamt5.org
|
||||||
|
- collection/browser/get-elevation-service-for-chromium-based-browsers still@teamt5.org
|
||||||
|
- collection/get-steam-token still@teamt5.org
|
||||||
|
- nursery/persist-via-application-shimming j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-bits-job j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-print-processors-registry-key j.j.vannielen@utwente.nl
|
||||||
|
- linking/static/touchsocket/linked-against-touchsocket still@teamt5.org
|
||||||
|
- runtime/dotnet/compiled-with-dotnet-aot still@teamt5.org
|
||||||
|
- nursery/persist-via-errorhandler-script j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-get-variable-hijack j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-iphlpapi-dll-hijack j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-lnk-shortcut j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-powershell-profile j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-windows-accessibility-tools j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-windows-terminal-profile j.j.vannielen@utwente.nl
|
||||||
|
- nursery/write-to-browser-extension-directory j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-aedebug-registry-key j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-amsi-registry-key j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-app-paths-registry-key j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-appcertdlls-registry-key j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-appx-registry-key j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-autodialdll-registry-key j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-autoplayhandlers-registry-key j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-bootverificationprogram-registry-key j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-code-signing-registry-key j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-com-hijack j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-command-processor-registry-key j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-contextmenuhandlers-registry-key j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-cor_profiler_path-registry-value j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-default-file-association-registry-key j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-disk-cleanup-handler-registry-key j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-dotnet-dbgmanageddebugger-registry-key j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-dotnet_startup_hooks-registry-key j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-explorer-tools-registry-key j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-filter-handlers-registry-key j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-group-policy-registry-key j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-hhctrl-com-hijack j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-htmlhelp-author-registry-key j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-image-file-execution-options-registry-key j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-lsa-registry-key j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-natural-language-registry-key j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-netsh-registry-key j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-network-provider-registry-key j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-path-registry-key j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-print-monitors-registry-key j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-rdp-startup-programs-registry-key j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-silentprocessexit-registry-key j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-telemetrycontroller-registry-key j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-timeproviders-registry-key j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-ts-initialprogram-registry-key j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-userinitmprlogonscript-registry-value j.j.vannielen@utwente.nl
|
||||||
|
- nursery/persist-via-windows-error-reporting-registry-key j.j.vannielen@utwente.nl
|
||||||
|
|
||||||
|
### Bug Fixes
|
||||||
|
|
||||||
|
- extractor: fix exception when PE extractor encounters unknown architecture #2440 @Tamir-K
|
||||||
|
- IDA Pro: rename ida to idapro module for plugin and idalib in IDA 9.0 #2453 @mr-tz
|
||||||
|
- ghidra: fix saving of base address @mr-tz
|
||||||
|
- binja: support loading raw x86/x86_64 shellcode #2489 @xusheng6
|
||||||
|
- binja: fix crash when the IL of certain functions are not available. #2249 @xusheng6
|
||||||
|
- binja: major performance improvement on the binja extractor. #1414 @xusheng6
|
||||||
|
- cape: make Process model flexible and procmemory optional to load newest reports #2466 @mr-tz
|
||||||
|
- binja: fix unit test failure by fixing up the analysis for file al-khaser_x64.exe_ #2507 @xusheng6
|
||||||
|
- binja: move the stack string detection to function level #2516 @xusheng6
|
||||||
|
- BinExport2: fix handling of incorrect thunk functions #2524 @williballenthin
|
||||||
|
- BinExport2: more precise pruning of expressions @williballenthin
|
||||||
|
- BinExport2: better handle weird expression trees from Ghidra #2528 #2530 @williballenthin
|
||||||
|
|
||||||
|
### capa Explorer Web
|
||||||
|
|
||||||
|
### capa Explorer IDA Pro plugin
|
||||||
|
|
||||||
|
- fix bug preventing saving of capa results via Save button @mr-tz
|
||||||
|
- fix saving of base address @mr-tz
|
||||||
|
|
||||||
|
### Development
|
||||||
|
- CI: use macos-13 since macos-12 is deprecated and will be removed on December 3rd, 2024 #2173 @mr-tz
|
||||||
|
- CI: update Binary Ninja version to 4.2 #2499 @xusheng6
|
||||||
|
|
||||||
|
### Raw diffs
|
||||||
|
- [capa v7.4.0...v8.0.0](https://github.com/mandiant/capa/compare/v7.4.0...v8.0.0)
|
||||||
|
- [capa-rules v7.4.0...v8.0.0](https://github.com/mandiant/capa-rules/compare/v7.4.0...v8.0.0)
|
||||||
|
|
||||||
## v7.4.0
|
## v7.4.0
|
||||||
|
|
||||||
@@ -179,6 +299,8 @@ Special thanks to our repeat and new contributors:
|
|||||||
- CI: update Binary Ninja version to 4.1 and use Python 3.9 to test it #2211 @xusheng6
|
- CI: update Binary Ninja version to 4.1 and use Python 3.9 to test it #2211 @xusheng6
|
||||||
- CI: update tests.yml workflow to exclude web and documentation files #2263 @s-ff
|
- CI: update tests.yml workflow to exclude web and documentation files #2263 @s-ff
|
||||||
- CI: update build.yml workflow to exclude web and documentation files #2270 @s-ff
|
- CI: update build.yml workflow to exclude web and documentation files #2270 @s-ff
|
||||||
|
- CI: add web releases workflow #2455 @s-ff
|
||||||
|
- CI: skip changelog.yml for dependabot PRs #2471
|
||||||
|
|
||||||
### Raw diffs
|
### Raw diffs
|
||||||
|
|
||||||
|
|||||||
82
README.md
82
README.md
@@ -38,49 +38,47 @@ Below you find a list of [our capa blog posts with more details.](#blog-posts)
|
|||||||
```
|
```
|
||||||
$ capa.exe suspicious.exe
|
$ capa.exe suspicious.exe
|
||||||
|
|
||||||
+------------------------+--------------------------------------------------------------------------------+
|
+--------------------+------------------------------------------------------------------------+
|
||||||
| ATT&CK Tactic | ATT&CK Technique |
|
| ATT&CK Tactic | ATT&CK Technique |
|
||||||
|------------------------+--------------------------------------------------------------------------------|
|
|--------------------+------------------------------------------------------------------------|
|
||||||
| DEFENSE EVASION | Obfuscated Files or Information [T1027] |
|
| DEFENSE EVASION | Obfuscated Files or Information [T1027] |
|
||||||
| DISCOVERY | Query Registry [T1012] |
|
| DISCOVERY | Query Registry [T1012] |
|
||||||
| | System Information Discovery [T1082] |
|
| | System Information Discovery [T1082] |
|
||||||
| EXECUTION | Command and Scripting Interpreter::Windows Command Shell [T1059.003] |
|
| EXECUTION | Command and Scripting Interpreter::Windows Command Shell [T1059.003] |
|
||||||
| | Shared Modules [T1129] |
|
| | Shared Modules [T1129] |
|
||||||
| EXFILTRATION | Exfiltration Over C2 Channel [T1041] |
|
| EXFILTRATION | Exfiltration Over C2 Channel [T1041] |
|
||||||
| PERSISTENCE | Create or Modify System Process::Windows Service [T1543.003] |
|
| PERSISTENCE | Create or Modify System Process::Windows Service [T1543.003] |
|
||||||
+------------------------+--------------------------------------------------------------------------------+
|
+--------------------+------------------------------------------------------------------------+
|
||||||
|
|
||||||
+-------------------------------------------------------+-------------------------------------------------+
|
+-------------------------------------------+-------------------------------------------------+
|
||||||
| CAPABILITY | NAMESPACE |
|
| CAPABILITY | NAMESPACE |
|
||||||
|-------------------------------------------------------+-------------------------------------------------|
|
|-------------------------------------------+-------------------------------------------------|
|
||||||
| check for OutputDebugString error | anti-analysis/anti-debugging/debugger-detection |
|
| read and send data from client to server | c2/file-transfer |
|
||||||
| read and send data from client to server | c2/file-transfer |
|
| execute shell command and capture output | c2/shell |
|
||||||
| execute shell command and capture output | c2/shell |
|
| receive data (2 matches) | communication |
|
||||||
| receive data (2 matches) | communication |
|
| send data (6 matches) | communication |
|
||||||
| send data (6 matches) | communication |
|
| connect to HTTP server (3 matches) | communication/http/client |
|
||||||
| connect to HTTP server (3 matches) | communication/http/client |
|
| send HTTP request (3 matches) | communication/http/client |
|
||||||
| send HTTP request (3 matches) | communication/http/client |
|
| create pipe | communication/named-pipe/create |
|
||||||
| create pipe | communication/named-pipe/create |
|
| get socket status (2 matches) | communication/socket |
|
||||||
| get socket status (2 matches) | communication/socket |
|
| receive data on socket (2 matches) | communication/socket/receive |
|
||||||
| receive data on socket (2 matches) | communication/socket/receive |
|
| send data on socket (3 matches) | communication/socket/send |
|
||||||
| send data on socket (3 matches) | communication/socket/send |
|
| connect TCP socket | communication/socket/tcp |
|
||||||
| connect TCP socket | communication/socket/tcp |
|
| encode data using Base64 | data-manipulation/encoding/base64 |
|
||||||
| encode data using Base64 | data-manipulation/encoding/base64 |
|
| encode data using XOR (6 matches) | data-manipulation/encoding/xor |
|
||||||
| encode data using XOR (6 matches) | data-manipulation/encoding/xor |
|
| run as a service | executable/pe |
|
||||||
| run as a service | executable/pe |
|
| get common file path (3 matches) | host-interaction/file-system |
|
||||||
| get common file path (3 matches) | host-interaction/file-system |
|
| read file | host-interaction/file-system/read |
|
||||||
| read file | host-interaction/file-system/read |
|
| write file (2 matches) | host-interaction/file-system/write |
|
||||||
| write file (2 matches) | host-interaction/file-system/write |
|
| print debug messages (2 matches) | host-interaction/log/debug/write-event |
|
||||||
| print debug messages (2 matches) | host-interaction/log/debug/write-event |
|
| resolve DNS | host-interaction/network/dns/resolve |
|
||||||
| resolve DNS | host-interaction/network/dns/resolve |
|
| get hostname | host-interaction/os/hostname |
|
||||||
| get hostname | host-interaction/os/hostname |
|
| create process | host-interaction/process/create |
|
||||||
| create a process with modified I/O handles and window | host-interaction/process/create |
|
| create registry key | host-interaction/registry/create |
|
||||||
| create process | host-interaction/process/create |
|
| create service | host-interaction/service/create |
|
||||||
| create registry key | host-interaction/registry/create |
|
| create thread | host-interaction/thread/create |
|
||||||
| create service | host-interaction/service/create |
|
| persist via Windows service | persistence/service |
|
||||||
| create thread | host-interaction/thread/create |
|
+-------------------------------------------+-------------------------------------------------+
|
||||||
| persist via Windows service | persistence/service |
|
|
||||||
+-------------------------------------------------------+-------------------------------------------------+
|
|
||||||
```
|
```
|
||||||
|
|
||||||
# download and usage
|
# download and usage
|
||||||
|
|||||||
0
capa/analysis/__init__.py
Normal file
0
capa/analysis/__init__.py
Normal file
38
capa/analysis/flirt.py
Normal file
38
capa/analysis/flirt.py
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
# Copyright (C) 2024 Mandiant, Inc. All Rights Reserved.
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||||
|
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
import capa.features.extractors.ida.idalib as idalib
|
||||||
|
|
||||||
|
if not idalib.has_idalib():
|
||||||
|
raise RuntimeError("cannot find IDA idalib module.")
|
||||||
|
|
||||||
|
if not idalib.load_idalib():
|
||||||
|
raise RuntimeError("failed to load IDA idalib module.")
|
||||||
|
|
||||||
|
import idaapi
|
||||||
|
import idautils
|
||||||
|
|
||||||
|
|
||||||
|
class FunctionId(BaseModel):
|
||||||
|
va: int
|
||||||
|
is_library: bool
|
||||||
|
name: str
|
||||||
|
|
||||||
|
|
||||||
|
def get_flirt_matches(lib_only=True):
|
||||||
|
for fva in idautils.Functions():
|
||||||
|
f = idaapi.get_func(fva)
|
||||||
|
is_lib = bool(f.flags & idaapi.FUNC_LIB)
|
||||||
|
fname = idaapi.get_func_name(fva)
|
||||||
|
|
||||||
|
if lib_only and not is_lib:
|
||||||
|
continue
|
||||||
|
|
||||||
|
yield FunctionId(va=fva, is_library=is_lib, name=fname)
|
||||||
242
capa/analysis/libraries.py
Normal file
242
capa/analysis/libraries.py
Normal file
@@ -0,0 +1,242 @@
|
|||||||
|
# Copyright (C) 2024 Mandiant, Inc. All Rights Reserved.
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||||
|
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
import io
|
||||||
|
import sys
|
||||||
|
import logging
|
||||||
|
import argparse
|
||||||
|
import tempfile
|
||||||
|
import contextlib
|
||||||
|
from enum import Enum
|
||||||
|
from typing import List, Optional
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import rich
|
||||||
|
from pydantic import BaseModel
|
||||||
|
from rich.text import Text
|
||||||
|
from rich.console import Console
|
||||||
|
|
||||||
|
import capa.main
|
||||||
|
import capa.helpers
|
||||||
|
import capa.analysis.flirt
|
||||||
|
import capa.analysis.strings
|
||||||
|
import capa.features.extractors.ida.idalib as idalib
|
||||||
|
|
||||||
|
if not idalib.has_idalib():
|
||||||
|
raise RuntimeError("cannot find IDA idalib module.")
|
||||||
|
|
||||||
|
if not idalib.load_idalib():
|
||||||
|
raise RuntimeError("failed to load IDA idalib module.")
|
||||||
|
|
||||||
|
import idaapi
|
||||||
|
import idapro
|
||||||
|
import ida_auto
|
||||||
|
import idautils
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class Classification(str, Enum):
|
||||||
|
USER = "user"
|
||||||
|
LIBRARY = "library"
|
||||||
|
UNKNOWN = "unknown"
|
||||||
|
|
||||||
|
|
||||||
|
class Method(str, Enum):
|
||||||
|
FLIRT = "flirt"
|
||||||
|
STRINGS = "strings"
|
||||||
|
THUNK = "thunk"
|
||||||
|
ENTRYPOINT = "entrypoint"
|
||||||
|
|
||||||
|
|
||||||
|
class FunctionClassification(BaseModel):
|
||||||
|
va: int
|
||||||
|
classification: Classification
|
||||||
|
# name per the disassembler/analysis tool
|
||||||
|
# may be combined with the recovered/suspected name TODO below
|
||||||
|
name: str
|
||||||
|
|
||||||
|
# if is library, this must be provided
|
||||||
|
method: Optional[Method]
|
||||||
|
|
||||||
|
# TODO if is library, recovered/suspected name?
|
||||||
|
|
||||||
|
# if is library, these can optionally be provided.
|
||||||
|
library_name: Optional[str] = None
|
||||||
|
library_version: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
class FunctionIdResults(BaseModel):
|
||||||
|
function_classifications: List[FunctionClassification]
|
||||||
|
|
||||||
|
|
||||||
|
@contextlib.contextmanager
|
||||||
|
def ida_session(input_path: Path, use_temp_dir=True):
|
||||||
|
if use_temp_dir:
|
||||||
|
t = Path(tempfile.mkdtemp(prefix="ida-")) / input_path.name
|
||||||
|
else:
|
||||||
|
t = input_path
|
||||||
|
|
||||||
|
logger.debug("using %s", str(t))
|
||||||
|
# stderr=True is used here to redirect the spinner banner to stderr,
|
||||||
|
# so that users can redirect capa's output.
|
||||||
|
console = Console(stderr=True, quiet=False)
|
||||||
|
|
||||||
|
try:
|
||||||
|
if use_temp_dir:
|
||||||
|
t.write_bytes(input_path.read_bytes())
|
||||||
|
|
||||||
|
# idalib writes to stdout (ugh), so we have to capture that
|
||||||
|
# so as not to screw up structured output.
|
||||||
|
with capa.helpers.stdout_redirector(io.BytesIO()):
|
||||||
|
idapro.enable_console_messages(False)
|
||||||
|
with capa.main.timing("analyze program"):
|
||||||
|
with console.status("analyzing program...", spinner="dots"):
|
||||||
|
if idapro.open_database(str(t.absolute()), run_auto_analysis=True):
|
||||||
|
raise RuntimeError("failed to analyze input file")
|
||||||
|
|
||||||
|
logger.debug("idalib: waiting for analysis...")
|
||||||
|
ida_auto.auto_wait()
|
||||||
|
logger.debug("idalib: opened database.")
|
||||||
|
|
||||||
|
yield
|
||||||
|
finally:
|
||||||
|
idapro.close_database()
|
||||||
|
if use_temp_dir:
|
||||||
|
t.unlink()
|
||||||
|
|
||||||
|
|
||||||
|
def is_thunk_function(fva):
|
||||||
|
f = idaapi.get_func(fva)
|
||||||
|
return bool(f.flags & idaapi.FUNC_THUNK)
|
||||||
|
|
||||||
|
|
||||||
|
def main(argv=None):
|
||||||
|
if argv is None:
|
||||||
|
argv = sys.argv[1:]
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(description="Identify library functions using various strategies.")
|
||||||
|
capa.main.install_common_args(parser, wanted={"input_file"})
|
||||||
|
parser.add_argument("--store-idb", action="store_true", default=False, help="store IDA database file")
|
||||||
|
parser.add_argument("--min-string-length", type=int, default=8, help="minimum string length")
|
||||||
|
parser.add_argument("-j", "--json", action="store_true", help="emit JSON instead of text")
|
||||||
|
args = parser.parse_args(args=argv)
|
||||||
|
|
||||||
|
try:
|
||||||
|
capa.main.handle_common_args(args)
|
||||||
|
except capa.main.ShouldExitError as e:
|
||||||
|
return e.status_code
|
||||||
|
|
||||||
|
dbs = capa.analysis.strings.get_default_databases()
|
||||||
|
capa.analysis.strings.prune_databases(dbs, n=args.min_string_length)
|
||||||
|
|
||||||
|
function_classifications: List[FunctionClassification] = []
|
||||||
|
with ida_session(args.input_file, use_temp_dir=not args.store_idb):
|
||||||
|
with capa.main.timing("FLIRT-based library identification"):
|
||||||
|
# TODO: add more signature (files)
|
||||||
|
# TOOD: apply more signatures
|
||||||
|
for flirt_match in capa.analysis.flirt.get_flirt_matches():
|
||||||
|
function_classifications.append(
|
||||||
|
FunctionClassification(
|
||||||
|
va=flirt_match.va,
|
||||||
|
name=flirt_match.name,
|
||||||
|
classification=Classification.LIBRARY,
|
||||||
|
method=Method.FLIRT,
|
||||||
|
# note: we cannot currently include which signature matched per function via the IDA API
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# thunks
|
||||||
|
for fva in idautils.Functions():
|
||||||
|
if is_thunk_function(fva):
|
||||||
|
function_classifications.append(
|
||||||
|
FunctionClassification(
|
||||||
|
va=fva,
|
||||||
|
name=idaapi.get_func_name(fva),
|
||||||
|
classification=Classification.LIBRARY,
|
||||||
|
method=Method.THUNK,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
with capa.main.timing("string-based library identification"):
|
||||||
|
for string_match in capa.analysis.strings.get_string_matches(dbs):
|
||||||
|
function_classifications.append(
|
||||||
|
FunctionClassification(
|
||||||
|
va=string_match.va,
|
||||||
|
name=idaapi.get_func_name(string_match.va),
|
||||||
|
classification=Classification.LIBRARY,
|
||||||
|
method=Method.STRINGS,
|
||||||
|
library_name=string_match.metadata.library_name,
|
||||||
|
library_version=string_match.metadata.library_version,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
for va in idautils.Functions():
|
||||||
|
name = idaapi.get_func_name(va)
|
||||||
|
if name not in {
|
||||||
|
"WinMain",
|
||||||
|
}:
|
||||||
|
continue
|
||||||
|
|
||||||
|
function_classifications.append(
|
||||||
|
FunctionClassification(
|
||||||
|
va=va,
|
||||||
|
name=name,
|
||||||
|
classification=Classification.USER,
|
||||||
|
method=Method.ENTRYPOINT,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
doc = FunctionIdResults(function_classifications=[])
|
||||||
|
classifications_by_va = capa.analysis.strings.create_index(function_classifications, "va")
|
||||||
|
for va in idautils.Functions():
|
||||||
|
if classifications := classifications_by_va.get(va):
|
||||||
|
doc.function_classifications.extend(classifications)
|
||||||
|
else:
|
||||||
|
doc.function_classifications.append(
|
||||||
|
FunctionClassification(
|
||||||
|
va=va,
|
||||||
|
name=idaapi.get_func_name(va),
|
||||||
|
classification=Classification.UNKNOWN,
|
||||||
|
method=None,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
if args.json:
|
||||||
|
print(doc.model_dump_json()) # noqa: T201 print found
|
||||||
|
|
||||||
|
else:
|
||||||
|
table = rich.table.Table()
|
||||||
|
table.add_column("FVA")
|
||||||
|
table.add_column("CLASSIFICATION")
|
||||||
|
table.add_column("METHOD")
|
||||||
|
table.add_column("FNAME")
|
||||||
|
table.add_column("EXTRA INFO")
|
||||||
|
|
||||||
|
classifications_by_va = capa.analysis.strings.create_index(doc.function_classifications, "va", sorted_=True)
|
||||||
|
for va, classifications in classifications_by_va.items():
|
||||||
|
name = ", ".join({c.name for c in classifications})
|
||||||
|
if "sub_" in name:
|
||||||
|
name = Text(name, style="grey53")
|
||||||
|
|
||||||
|
classification = {c.classification for c in classifications}
|
||||||
|
method = {c.method for c in classifications if c.method}
|
||||||
|
extra = {f"{c.library_name}@{c.library_version}" for c in classifications if c.library_name}
|
||||||
|
|
||||||
|
table.add_row(
|
||||||
|
hex(va),
|
||||||
|
", ".join(classification) if classification != {"unknown"} else Text("unknown", style="grey53"),
|
||||||
|
", ".join(method),
|
||||||
|
name,
|
||||||
|
", ".join(extra),
|
||||||
|
)
|
||||||
|
|
||||||
|
rich.print(table)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
sys.exit(main())
|
||||||
2
capa/analysis/requirements.txt
Normal file
2
capa/analysis/requirements.txt
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
# temporary extra file to track dependencies of the analysis directory
|
||||||
|
nltk==3.9.1
|
||||||
269
capa/analysis/strings/__init__.py
Normal file
269
capa/analysis/strings/__init__.py
Normal file
@@ -0,0 +1,269 @@
|
|||||||
|
# Copyright (C) 2024 Mandiant, Inc. All Rights Reserved.
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||||
|
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
|
||||||
|
"""
|
||||||
|
further requirements:
|
||||||
|
- nltk
|
||||||
|
"""
|
||||||
|
import gzip
|
||||||
|
import logging
|
||||||
|
import collections
|
||||||
|
from typing import Any, Dict, Mapping
|
||||||
|
from pathlib import Path
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
import msgspec
|
||||||
|
|
||||||
|
import capa.features.extractors.strings
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class LibraryString(msgspec.Struct):
|
||||||
|
string: str
|
||||||
|
library_name: str
|
||||||
|
library_version: str
|
||||||
|
file_path: str | None = None
|
||||||
|
function_name: str | None = None
|
||||||
|
line_number: int | None = None
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class LibraryStringDatabase:
|
||||||
|
metadata_by_string: Dict[str, LibraryString]
|
||||||
|
|
||||||
|
def __len__(self) -> int:
|
||||||
|
return len(self.metadata_by_string)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_file(cls, path: Path) -> "LibraryStringDatabase":
|
||||||
|
metadata_by_string: Dict[str, LibraryString] = {}
|
||||||
|
decoder = msgspec.json.Decoder(type=LibraryString)
|
||||||
|
for line in gzip.decompress(path.read_bytes()).split(b"\n"):
|
||||||
|
if not line:
|
||||||
|
continue
|
||||||
|
s = decoder.decode(line)
|
||||||
|
metadata_by_string[s.string] = s
|
||||||
|
|
||||||
|
return cls(metadata_by_string=metadata_by_string)
|
||||||
|
|
||||||
|
|
||||||
|
DEFAULT_FILENAMES = (
|
||||||
|
"brotli.jsonl.gz",
|
||||||
|
"bzip2.jsonl.gz",
|
||||||
|
"cryptopp.jsonl.gz",
|
||||||
|
"curl.jsonl.gz",
|
||||||
|
"detours.jsonl.gz",
|
||||||
|
"jemalloc.jsonl.gz",
|
||||||
|
"jsoncpp.jsonl.gz",
|
||||||
|
"kcp.jsonl.gz",
|
||||||
|
"liblzma.jsonl.gz",
|
||||||
|
"libsodium.jsonl.gz",
|
||||||
|
"libpcap.jsonl.gz",
|
||||||
|
"mbedtls.jsonl.gz",
|
||||||
|
"openssl.jsonl.gz",
|
||||||
|
"sqlite3.jsonl.gz",
|
||||||
|
"tomcrypt.jsonl.gz",
|
||||||
|
"wolfssl.jsonl.gz",
|
||||||
|
"zlib.jsonl.gz",
|
||||||
|
)
|
||||||
|
|
||||||
|
DEFAULT_PATHS = tuple(Path(__file__).parent / "data" / "oss" / filename for filename in DEFAULT_FILENAMES) + (
|
||||||
|
Path(__file__).parent / "data" / "crt" / "msvc_v143.jsonl.gz",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def get_default_databases() -> list[LibraryStringDatabase]:
|
||||||
|
return [LibraryStringDatabase.from_file(path) for path in DEFAULT_PATHS]
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class WindowsApiStringDatabase:
|
||||||
|
dll_names: set[str]
|
||||||
|
api_names: set[str]
|
||||||
|
|
||||||
|
def __len__(self) -> int:
|
||||||
|
return len(self.dll_names) + len(self.api_names)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_dir(cls, path: Path) -> "WindowsApiStringDatabase":
|
||||||
|
dll_names: set[str] = set()
|
||||||
|
api_names: set[str] = set()
|
||||||
|
|
||||||
|
for line in gzip.decompress((path / "dlls.txt.gz").read_bytes()).decode("utf-8").splitlines():
|
||||||
|
if not line:
|
||||||
|
continue
|
||||||
|
dll_names.add(line)
|
||||||
|
|
||||||
|
for line in gzip.decompress((path / "apis.txt.gz").read_bytes()).decode("utf-8").splitlines():
|
||||||
|
if not line:
|
||||||
|
continue
|
||||||
|
api_names.add(line)
|
||||||
|
|
||||||
|
return cls(dll_names=dll_names, api_names=api_names)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_defaults(cls) -> "WindowsApiStringDatabase":
|
||||||
|
return cls.from_dir(Path(__file__).parent / "data" / "winapi")
|
||||||
|
|
||||||
|
|
||||||
|
def extract_strings(buf, n=4):
|
||||||
|
yield from capa.features.extractors.strings.extract_ascii_strings(buf, n=n)
|
||||||
|
yield from capa.features.extractors.strings.extract_unicode_strings(buf, n=n)
|
||||||
|
|
||||||
|
|
||||||
|
def prune_databases(dbs: list[LibraryStringDatabase], n=8):
|
||||||
|
"""remove less trustyworthy database entries.
|
||||||
|
|
||||||
|
such as:
|
||||||
|
- those found in multiple databases
|
||||||
|
- those that are English words
|
||||||
|
- those that are too short
|
||||||
|
- Windows API and DLL names
|
||||||
|
"""
|
||||||
|
|
||||||
|
# TODO: consider applying these filters directly to the persisted databases, not at load time.
|
||||||
|
|
||||||
|
winapi = WindowsApiStringDatabase.from_defaults()
|
||||||
|
|
||||||
|
try:
|
||||||
|
from nltk.corpus import words as nltk_words
|
||||||
|
|
||||||
|
nltk_words.words()
|
||||||
|
except (ImportError, LookupError):
|
||||||
|
# one-time download of dataset.
|
||||||
|
# this probably doesn't work well for embedded use.
|
||||||
|
import nltk
|
||||||
|
|
||||||
|
nltk.download("words")
|
||||||
|
from nltk.corpus import words as nltk_words
|
||||||
|
words = set(nltk_words.words())
|
||||||
|
|
||||||
|
counter: collections.Counter[str] = collections.Counter()
|
||||||
|
to_remove = set()
|
||||||
|
for db in dbs:
|
||||||
|
for string in db.metadata_by_string.keys():
|
||||||
|
counter[string] += 1
|
||||||
|
|
||||||
|
if string in words:
|
||||||
|
to_remove.add(string)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if len(string) < n:
|
||||||
|
to_remove.add(string)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if string in winapi.api_names:
|
||||||
|
to_remove.add(string)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if string in winapi.dll_names:
|
||||||
|
to_remove.add(string)
|
||||||
|
continue
|
||||||
|
|
||||||
|
for string, count in counter.most_common():
|
||||||
|
if count <= 1:
|
||||||
|
break
|
||||||
|
|
||||||
|
# remove strings that are seen in more than one database
|
||||||
|
to_remove.add(string)
|
||||||
|
|
||||||
|
for db in dbs:
|
||||||
|
for string in to_remove:
|
||||||
|
if string in db.metadata_by_string:
|
||||||
|
del db.metadata_by_string[string]
|
||||||
|
|
||||||
|
|
||||||
|
def get_function_strings():
|
||||||
|
import idaapi
|
||||||
|
import idautils
|
||||||
|
|
||||||
|
import capa.features.extractors.ida.helpers as ida_helpers
|
||||||
|
|
||||||
|
strings_by_function = collections.defaultdict(set)
|
||||||
|
for ea in idautils.Functions():
|
||||||
|
f = idaapi.get_func(ea)
|
||||||
|
|
||||||
|
# ignore library functions and thunk functions as identified by IDA
|
||||||
|
if f.flags & idaapi.FUNC_THUNK:
|
||||||
|
continue
|
||||||
|
if f.flags & idaapi.FUNC_LIB:
|
||||||
|
continue
|
||||||
|
|
||||||
|
for bb in ida_helpers.get_function_blocks(f):
|
||||||
|
for insn in ida_helpers.get_instructions_in_range(bb.start_ea, bb.end_ea):
|
||||||
|
ref = capa.features.extractors.ida.helpers.find_data_reference_from_insn(insn)
|
||||||
|
if ref == insn.ea:
|
||||||
|
continue
|
||||||
|
|
||||||
|
string = capa.features.extractors.ida.helpers.find_string_at(ref)
|
||||||
|
if not string:
|
||||||
|
continue
|
||||||
|
|
||||||
|
strings_by_function[ea].add(string)
|
||||||
|
|
||||||
|
return strings_by_function
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class LibraryStringClassification:
|
||||||
|
va: int
|
||||||
|
string: str
|
||||||
|
library_name: str
|
||||||
|
metadata: LibraryString
|
||||||
|
|
||||||
|
|
||||||
|
def create_index(s: list, k: str, sorted_: bool = False) -> Mapping[Any, list]:
|
||||||
|
"""create an index of the elements in `s` using the key `k`, optionally sorted by `k`"""
|
||||||
|
if sorted_:
|
||||||
|
s = sorted(s, key=lambda x: getattr(x, k))
|
||||||
|
|
||||||
|
s_by_k = collections.defaultdict(list)
|
||||||
|
for v in s:
|
||||||
|
p = getattr(v, k)
|
||||||
|
s_by_k[p].append(v)
|
||||||
|
return s_by_k
|
||||||
|
|
||||||
|
|
||||||
|
def get_string_matches(dbs: list[LibraryStringDatabase]) -> list[LibraryStringClassification]:
|
||||||
|
matches: list[LibraryStringClassification] = []
|
||||||
|
|
||||||
|
for function, strings in sorted(get_function_strings().items()):
|
||||||
|
for string in strings:
|
||||||
|
for db in dbs:
|
||||||
|
if metadata := db.metadata_by_string.get(string):
|
||||||
|
matches.append(
|
||||||
|
LibraryStringClassification(
|
||||||
|
va=function,
|
||||||
|
string=string,
|
||||||
|
library_name=metadata.library_name,
|
||||||
|
metadata=metadata,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# if there are less than N strings per library, ignore that library
|
||||||
|
matches_by_library = create_index(matches, "library_name")
|
||||||
|
for library_name, library_matches in matches_by_library.items():
|
||||||
|
if len(library_matches) > 5:
|
||||||
|
continue
|
||||||
|
|
||||||
|
logger.info("pruning library %s: only %d matched string", library_name, len(library_matches))
|
||||||
|
matches = [m for m in matches if m.library_name != library_name]
|
||||||
|
|
||||||
|
# if there are conflicts within a single function, don't label it
|
||||||
|
matches_by_function = create_index(matches, "va")
|
||||||
|
for va, function_matches in matches_by_function.items():
|
||||||
|
library_names = {m.library_name for m in function_matches}
|
||||||
|
if len(library_names) == 1:
|
||||||
|
continue
|
||||||
|
|
||||||
|
logger.info("conflicting matches: 0x%x: %s", va, sorted(library_names))
|
||||||
|
# this is potentially slow (O(n**2)) but hopefully fast enough in practice.
|
||||||
|
matches = [m for m in matches if m.va != va]
|
||||||
|
|
||||||
|
return matches
|
||||||
130
capa/analysis/strings/__main__.py
Normal file
130
capa/analysis/strings/__main__.py
Normal file
@@ -0,0 +1,130 @@
|
|||||||
|
# Copyright (C) 2024 Mandiant, Inc. All Rights Reserved.
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||||
|
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
import sys
|
||||||
|
import logging
|
||||||
|
import collections
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import rich
|
||||||
|
from rich.text import Text
|
||||||
|
|
||||||
|
import capa.analysis.strings
|
||||||
|
import capa.features.extractors.strings
|
||||||
|
import capa.features.extractors.ida.helpers as ida_helpers
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def open_ida(input_path: Path):
|
||||||
|
import tempfile
|
||||||
|
|
||||||
|
import idapro
|
||||||
|
|
||||||
|
t = Path(tempfile.mkdtemp(prefix="ida-")) / input_path.name
|
||||||
|
t.write_bytes(input_path.read_bytes())
|
||||||
|
# resource leak: we should delete this upon exit
|
||||||
|
|
||||||
|
idapro.enable_console_messages(False)
|
||||||
|
idapro.open_database(str(t.absolute()), run_auto_analysis=True)
|
||||||
|
|
||||||
|
import ida_auto
|
||||||
|
|
||||||
|
ida_auto.auto_wait()
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
logging.basicConfig(level=logging.DEBUG)
|
||||||
|
|
||||||
|
# use n=8 to ignore common words
|
||||||
|
N = 8
|
||||||
|
|
||||||
|
input_path = Path(sys.argv[1])
|
||||||
|
|
||||||
|
dbs = capa.analysis.strings.get_default_databases()
|
||||||
|
capa.analysis.strings.prune_databases(dbs, n=N)
|
||||||
|
|
||||||
|
strings_by_library = collections.defaultdict(set)
|
||||||
|
for string in capa.analysis.strings.extract_strings(input_path.read_bytes(), n=N):
|
||||||
|
for db in dbs:
|
||||||
|
if metadata := db.metadata_by_string.get(string.s):
|
||||||
|
strings_by_library[metadata.library_name].add(string.s)
|
||||||
|
|
||||||
|
console = rich.get_console()
|
||||||
|
console.print("found libraries:", style="bold")
|
||||||
|
for library, strings in sorted(strings_by_library.items(), key=lambda p: len(p[1]), reverse=True):
|
||||||
|
console.print(f" - [b]{library}[/] ({len(strings)} strings)")
|
||||||
|
|
||||||
|
for string in sorted(strings)[:10]:
|
||||||
|
console.print(f" - {string}", markup=False, style="grey37")
|
||||||
|
|
||||||
|
if len(strings) > 10:
|
||||||
|
console.print(" ...", style="grey37")
|
||||||
|
|
||||||
|
if not strings_by_library:
|
||||||
|
console.print(" (none)", style="grey37")
|
||||||
|
# since we're not going to find any strings
|
||||||
|
# return early and don't do IDA analysis
|
||||||
|
return
|
||||||
|
|
||||||
|
open_ida(input_path)
|
||||||
|
|
||||||
|
import idaapi
|
||||||
|
import idautils
|
||||||
|
import ida_funcs
|
||||||
|
|
||||||
|
strings_by_function = collections.defaultdict(set)
|
||||||
|
for ea in idautils.Functions():
|
||||||
|
f = idaapi.get_func(ea)
|
||||||
|
|
||||||
|
# ignore library functions and thunk functions as identified by IDA
|
||||||
|
if f.flags & idaapi.FUNC_THUNK:
|
||||||
|
continue
|
||||||
|
if f.flags & idaapi.FUNC_LIB:
|
||||||
|
continue
|
||||||
|
|
||||||
|
for bb in ida_helpers.get_function_blocks(f):
|
||||||
|
for insn in ida_helpers.get_instructions_in_range(bb.start_ea, bb.end_ea):
|
||||||
|
ref = capa.features.extractors.ida.helpers.find_data_reference_from_insn(insn)
|
||||||
|
if ref == insn.ea:
|
||||||
|
continue
|
||||||
|
|
||||||
|
string = capa.features.extractors.ida.helpers.find_string_at(ref)
|
||||||
|
if not string:
|
||||||
|
continue
|
||||||
|
|
||||||
|
for db in dbs:
|
||||||
|
if metadata := db.metadata_by_string.get(string):
|
||||||
|
strings_by_function[ea].add(string)
|
||||||
|
|
||||||
|
# ensure there are at least XXX functions renamed, or ignore those entries
|
||||||
|
|
||||||
|
console.print("functions:", style="bold")
|
||||||
|
for function, strings in sorted(strings_by_function.items()):
|
||||||
|
if strings:
|
||||||
|
name = ida_funcs.get_func_name(function)
|
||||||
|
|
||||||
|
console.print(f" [b]{name}[/]@{function:08x}:")
|
||||||
|
|
||||||
|
for string in strings:
|
||||||
|
for db in dbs:
|
||||||
|
if metadata := db.metadata_by_string.get(string):
|
||||||
|
location = Text(
|
||||||
|
f"{metadata.library_name}@{metadata.library_version}::{metadata.function_name}",
|
||||||
|
style="grey37",
|
||||||
|
)
|
||||||
|
console.print(" - ", location, ": ", string.rstrip())
|
||||||
|
|
||||||
|
console.print()
|
||||||
|
|
||||||
|
console.print(
|
||||||
|
f"found {len(strings_by_function)} library functions across {len(list(idautils.Functions()))} functions"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
BIN
capa/analysis/strings/data/crt/msvc_v143.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/crt/msvc_v143.jsonl.gz
Normal file
Binary file not shown.
3
capa/analysis/strings/data/oss/.gitignore
vendored
Normal file
3
capa/analysis/strings/data/oss/.gitignore
vendored
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
*.csv
|
||||||
|
*.jsonl
|
||||||
|
*.jsonl.gz
|
||||||
BIN
capa/analysis/strings/data/oss/brotli.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/oss/brotli.jsonl.gz
Normal file
Binary file not shown.
BIN
capa/analysis/strings/data/oss/bzip2.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/oss/bzip2.jsonl.gz
Normal file
Binary file not shown.
BIN
capa/analysis/strings/data/oss/cryptopp.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/oss/cryptopp.jsonl.gz
Normal file
Binary file not shown.
BIN
capa/analysis/strings/data/oss/curl.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/oss/curl.jsonl.gz
Normal file
Binary file not shown.
BIN
capa/analysis/strings/data/oss/detours.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/oss/detours.jsonl.gz
Normal file
Binary file not shown.
BIN
capa/analysis/strings/data/oss/jemalloc.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/oss/jemalloc.jsonl.gz
Normal file
Binary file not shown.
BIN
capa/analysis/strings/data/oss/jsoncpp.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/oss/jsoncpp.jsonl.gz
Normal file
Binary file not shown.
BIN
capa/analysis/strings/data/oss/kcp.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/oss/kcp.jsonl.gz
Normal file
Binary file not shown.
BIN
capa/analysis/strings/data/oss/liblzma.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/oss/liblzma.jsonl.gz
Normal file
Binary file not shown.
BIN
capa/analysis/strings/data/oss/libpcap.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/oss/libpcap.jsonl.gz
Normal file
Binary file not shown.
BIN
capa/analysis/strings/data/oss/libsodium.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/oss/libsodium.jsonl.gz
Normal file
Binary file not shown.
BIN
capa/analysis/strings/data/oss/mbedtls.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/oss/mbedtls.jsonl.gz
Normal file
Binary file not shown.
BIN
capa/analysis/strings/data/oss/openssl.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/oss/openssl.jsonl.gz
Normal file
Binary file not shown.
99
capa/analysis/strings/data/oss/readme.md
Normal file
99
capa/analysis/strings/data/oss/readme.md
Normal file
@@ -0,0 +1,99 @@
|
|||||||
|
# Strings from Open Source libraries
|
||||||
|
|
||||||
|
This directory contains databases of strings extracted from open soure software.
|
||||||
|
capa uses these databases to ignore functions that are likely library code.
|
||||||
|
|
||||||
|
There is one file for each database. Each database is a gzip-compressed, JSONL (one JSON document per line) file.
|
||||||
|
The JSON document looks like this:
|
||||||
|
|
||||||
|
string: "1.0.8, 13-Jul-2019"
|
||||||
|
library_name: "bzip2"
|
||||||
|
library_version: "1.0.8#3"
|
||||||
|
file_path: "CMakeFiles/bz2.dir/bzlib.c.obj"
|
||||||
|
function_name: "BZ2_bzlibVersion"
|
||||||
|
line_number: null
|
||||||
|
|
||||||
|
The following databases were extracted via the vkpkg & jh technique:
|
||||||
|
|
||||||
|
- brotli 1.0.9#5
|
||||||
|
- bzip2 1.0.8#3
|
||||||
|
- cryptopp 8.7.0
|
||||||
|
- curl 7.86.0#1
|
||||||
|
- detours 4.0.1#7
|
||||||
|
- jemalloc 5.3.0#1
|
||||||
|
- jsoncpp 1.9.5
|
||||||
|
- kcp 1.7
|
||||||
|
- liblzma 5.2.5#6
|
||||||
|
- libsodium 1.0.18#8
|
||||||
|
- libpcap 1.10.1#3
|
||||||
|
- mbedtls 2.28.1
|
||||||
|
- openssl 3.0.7#1
|
||||||
|
- sqlite3 3.40.0#1
|
||||||
|
- tomcrypt 1.18.2#2
|
||||||
|
- wolfssl 5.5.0
|
||||||
|
- zlib 1.2.13
|
||||||
|
|
||||||
|
This code was originally developed in FLOSS and imported into capa.
|
||||||
|
|
||||||
|
## The vkpkg & jh technique
|
||||||
|
|
||||||
|
Major steps:
|
||||||
|
|
||||||
|
1. build static libraries via vcpkg
|
||||||
|
2. extract features via jh
|
||||||
|
3. convert to JSONL format with `jh_to_qs.py`
|
||||||
|
4. compress with gzip
|
||||||
|
|
||||||
|
### Build static libraries via vcpkg
|
||||||
|
|
||||||
|
[vcpkg](https://vcpkg.io/en/) is a free C/C++ package manager for acquiring and managing libraries.
|
||||||
|
We use it to easily build common open source libraries, like zlib.
|
||||||
|
Use the triplet `x64-windows-static` to build static archives (.lib files that are AR archives containing COFF object files):
|
||||||
|
|
||||||
|
```console
|
||||||
|
PS > C:\vcpkg\vcpkg.exe install --triplet x64-windows-static zlib
|
||||||
|
```
|
||||||
|
|
||||||
|
### Extract features via jh
|
||||||
|
|
||||||
|
[jh](https://github.com/williballenthin/lancelot/blob/master/bin/src/bin/jh.rs)
|
||||||
|
is a lancelot-based utility that parses AR archives containing COFF object files,
|
||||||
|
reconstructs their control flow, finds functions, and extracts features.
|
||||||
|
jh extracts numbers, API calls, and strings; we are only interested in the string features.
|
||||||
|
|
||||||
|
For each feature, jh emits a CSV line with the fields
|
||||||
|
- target triplet
|
||||||
|
- compiler
|
||||||
|
- library
|
||||||
|
- version
|
||||||
|
- build profile
|
||||||
|
- path
|
||||||
|
- function
|
||||||
|
- feature type
|
||||||
|
- feature value
|
||||||
|
|
||||||
|
For example:
|
||||||
|
|
||||||
|
```csv
|
||||||
|
x64-windows-static,msvc143,bzip2,1.0.8#3,release,CMakeFiles/bz2.dir/bzlib.c.obj,BZ2_bzBuffToBuffCompress,number,0x00000100
|
||||||
|
```
|
||||||
|
|
||||||
|
For example, to invoke jh:
|
||||||
|
|
||||||
|
```console
|
||||||
|
$ ~/lancelot/target/release/jh x64-windows-static msvc143 zlib 1.2.13 release /mnt/c/vcpkg/installed/x64-windows-static/lib/zlib.lib > ~/flare-floss/floss/qs/db/data/oss/zlib.csv
|
||||||
|
```
|
||||||
|
|
||||||
|
### Convert to OSS database format
|
||||||
|
|
||||||
|
We use the script `jh_to_qs.py` to convert these CSV lines into JSONL file prepared for FLOSS:
|
||||||
|
|
||||||
|
```console
|
||||||
|
$ python3 jh_to_qs.py zlib.csv > zlib.jsonl
|
||||||
|
```
|
||||||
|
|
||||||
|
These files are then gzip'd:
|
||||||
|
|
||||||
|
```console
|
||||||
|
$ gzip -c zlib.jsonl > zlib.jsonl.gz
|
||||||
|
```
|
||||||
BIN
capa/analysis/strings/data/oss/sqlite3.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/oss/sqlite3.jsonl.gz
Normal file
Binary file not shown.
BIN
capa/analysis/strings/data/oss/tomcrypt.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/oss/tomcrypt.jsonl.gz
Normal file
Binary file not shown.
BIN
capa/analysis/strings/data/oss/wolfssl.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/oss/wolfssl.jsonl.gz
Normal file
Binary file not shown.
BIN
capa/analysis/strings/data/oss/zlib.jsonl.gz
Normal file
BIN
capa/analysis/strings/data/oss/zlib.jsonl.gz
Normal file
Binary file not shown.
BIN
capa/analysis/strings/data/winapi/apis.txt.gz
Normal file
BIN
capa/analysis/strings/data/winapi/apis.txt.gz
Normal file
Binary file not shown.
BIN
capa/analysis/strings/data/winapi/dlls.txt.gz
Normal file
BIN
capa/analysis/strings/data/winapi/dlls.txt.gz
Normal file
Binary file not shown.
@@ -9,7 +9,7 @@
|
|||||||
import logging
|
import logging
|
||||||
import itertools
|
import itertools
|
||||||
import collections
|
import collections
|
||||||
from typing import Any, Tuple
|
from typing import Any
|
||||||
|
|
||||||
from capa.rules import Scope, RuleSet
|
from capa.rules import Scope, RuleSet
|
||||||
from capa.engine import FeatureSet, MatchResults
|
from capa.engine import FeatureSet, MatchResults
|
||||||
@@ -64,7 +64,7 @@ def has_file_limitation(rules: RuleSet, capabilities: MatchResults, is_standalon
|
|||||||
|
|
||||||
def find_capabilities(
|
def find_capabilities(
|
||||||
ruleset: RuleSet, extractor: FeatureExtractor, disable_progress=None, **kwargs
|
ruleset: RuleSet, extractor: FeatureExtractor, disable_progress=None, **kwargs
|
||||||
) -> Tuple[MatchResults, Any]:
|
) -> tuple[MatchResults, Any]:
|
||||||
from capa.capabilities.static import find_static_capabilities
|
from capa.capabilities.static import find_static_capabilities
|
||||||
from capa.capabilities.dynamic import find_dynamic_capabilities
|
from capa.capabilities.dynamic import find_dynamic_capabilities
|
||||||
|
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
import logging
|
import logging
|
||||||
import itertools
|
import itertools
|
||||||
import collections
|
import collections
|
||||||
from typing import Any, List, Tuple
|
from typing import Any
|
||||||
|
|
||||||
import capa.perf
|
import capa.perf
|
||||||
import capa.features.freeze as frz
|
import capa.features.freeze as frz
|
||||||
@@ -24,7 +24,7 @@ logger = logging.getLogger(__name__)
|
|||||||
|
|
||||||
def find_call_capabilities(
|
def find_call_capabilities(
|
||||||
ruleset: RuleSet, extractor: DynamicFeatureExtractor, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle
|
ruleset: RuleSet, extractor: DynamicFeatureExtractor, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle
|
||||||
) -> Tuple[FeatureSet, MatchResults]:
|
) -> tuple[FeatureSet, MatchResults]:
|
||||||
"""
|
"""
|
||||||
find matches for the given rules for the given call.
|
find matches for the given rules for the given call.
|
||||||
|
|
||||||
@@ -51,7 +51,7 @@ def find_call_capabilities(
|
|||||||
|
|
||||||
def find_thread_capabilities(
|
def find_thread_capabilities(
|
||||||
ruleset: RuleSet, extractor: DynamicFeatureExtractor, ph: ProcessHandle, th: ThreadHandle
|
ruleset: RuleSet, extractor: DynamicFeatureExtractor, ph: ProcessHandle, th: ThreadHandle
|
||||||
) -> Tuple[FeatureSet, MatchResults, MatchResults]:
|
) -> tuple[FeatureSet, MatchResults, MatchResults]:
|
||||||
"""
|
"""
|
||||||
find matches for the given rules within the given thread.
|
find matches for the given rules within the given thread.
|
||||||
|
|
||||||
@@ -89,7 +89,7 @@ def find_thread_capabilities(
|
|||||||
|
|
||||||
def find_process_capabilities(
|
def find_process_capabilities(
|
||||||
ruleset: RuleSet, extractor: DynamicFeatureExtractor, ph: ProcessHandle
|
ruleset: RuleSet, extractor: DynamicFeatureExtractor, ph: ProcessHandle
|
||||||
) -> Tuple[MatchResults, MatchResults, MatchResults, int]:
|
) -> tuple[MatchResults, MatchResults, MatchResults, int]:
|
||||||
"""
|
"""
|
||||||
find matches for the given rules within the given process.
|
find matches for the given rules within the given process.
|
||||||
|
|
||||||
@@ -127,7 +127,7 @@ def find_process_capabilities(
|
|||||||
|
|
||||||
def find_dynamic_capabilities(
|
def find_dynamic_capabilities(
|
||||||
ruleset: RuleSet, extractor: DynamicFeatureExtractor, disable_progress=None
|
ruleset: RuleSet, extractor: DynamicFeatureExtractor, disable_progress=None
|
||||||
) -> Tuple[MatchResults, Any]:
|
) -> tuple[MatchResults, Any]:
|
||||||
all_process_matches: MatchResults = collections.defaultdict(list)
|
all_process_matches: MatchResults = collections.defaultdict(list)
|
||||||
all_thread_matches: MatchResults = collections.defaultdict(list)
|
all_thread_matches: MatchResults = collections.defaultdict(list)
|
||||||
all_call_matches: MatchResults = collections.defaultdict(list)
|
all_call_matches: MatchResults = collections.defaultdict(list)
|
||||||
@@ -135,7 +135,7 @@ def find_dynamic_capabilities(
|
|||||||
feature_counts = rdoc.DynamicFeatureCounts(file=0, processes=())
|
feature_counts = rdoc.DynamicFeatureCounts(file=0, processes=())
|
||||||
|
|
||||||
assert isinstance(extractor, DynamicFeatureExtractor)
|
assert isinstance(extractor, DynamicFeatureExtractor)
|
||||||
processes: List[ProcessHandle] = list(extractor.get_processes())
|
processes: list[ProcessHandle] = list(extractor.get_processes())
|
||||||
n_processes: int = len(processes)
|
n_processes: int = len(processes)
|
||||||
|
|
||||||
with capa.helpers.CapaProgressBar(
|
with capa.helpers.CapaProgressBar(
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ import time
|
|||||||
import logging
|
import logging
|
||||||
import itertools
|
import itertools
|
||||||
import collections
|
import collections
|
||||||
from typing import Any, List, Tuple
|
from typing import Any
|
||||||
|
|
||||||
import capa.perf
|
import capa.perf
|
||||||
import capa.helpers
|
import capa.helpers
|
||||||
@@ -26,7 +26,7 @@ logger = logging.getLogger(__name__)
|
|||||||
|
|
||||||
def find_instruction_capabilities(
|
def find_instruction_capabilities(
|
||||||
ruleset: RuleSet, extractor: StaticFeatureExtractor, f: FunctionHandle, bb: BBHandle, insn: InsnHandle
|
ruleset: RuleSet, extractor: StaticFeatureExtractor, f: FunctionHandle, bb: BBHandle, insn: InsnHandle
|
||||||
) -> Tuple[FeatureSet, MatchResults]:
|
) -> tuple[FeatureSet, MatchResults]:
|
||||||
"""
|
"""
|
||||||
find matches for the given rules for the given instruction.
|
find matches for the given rules for the given instruction.
|
||||||
|
|
||||||
@@ -53,7 +53,7 @@ def find_instruction_capabilities(
|
|||||||
|
|
||||||
def find_basic_block_capabilities(
|
def find_basic_block_capabilities(
|
||||||
ruleset: RuleSet, extractor: StaticFeatureExtractor, f: FunctionHandle, bb: BBHandle
|
ruleset: RuleSet, extractor: StaticFeatureExtractor, f: FunctionHandle, bb: BBHandle
|
||||||
) -> Tuple[FeatureSet, MatchResults, MatchResults]:
|
) -> tuple[FeatureSet, MatchResults, MatchResults]:
|
||||||
"""
|
"""
|
||||||
find matches for the given rules within the given basic block.
|
find matches for the given rules within the given basic block.
|
||||||
|
|
||||||
@@ -93,7 +93,7 @@ def find_basic_block_capabilities(
|
|||||||
|
|
||||||
def find_code_capabilities(
|
def find_code_capabilities(
|
||||||
ruleset: RuleSet, extractor: StaticFeatureExtractor, fh: FunctionHandle
|
ruleset: RuleSet, extractor: StaticFeatureExtractor, fh: FunctionHandle
|
||||||
) -> Tuple[MatchResults, MatchResults, MatchResults, int]:
|
) -> tuple[MatchResults, MatchResults, MatchResults, int]:
|
||||||
"""
|
"""
|
||||||
find matches for the given rules within the given function.
|
find matches for the given rules within the given function.
|
||||||
|
|
||||||
@@ -131,16 +131,16 @@ def find_code_capabilities(
|
|||||||
|
|
||||||
def find_static_capabilities(
|
def find_static_capabilities(
|
||||||
ruleset: RuleSet, extractor: StaticFeatureExtractor, disable_progress=None
|
ruleset: RuleSet, extractor: StaticFeatureExtractor, disable_progress=None
|
||||||
) -> Tuple[MatchResults, Any]:
|
) -> tuple[MatchResults, Any]:
|
||||||
all_function_matches: MatchResults = collections.defaultdict(list)
|
all_function_matches: MatchResults = collections.defaultdict(list)
|
||||||
all_bb_matches: MatchResults = collections.defaultdict(list)
|
all_bb_matches: MatchResults = collections.defaultdict(list)
|
||||||
all_insn_matches: MatchResults = collections.defaultdict(list)
|
all_insn_matches: MatchResults = collections.defaultdict(list)
|
||||||
|
|
||||||
feature_counts = rdoc.StaticFeatureCounts(file=0, functions=())
|
feature_counts = rdoc.StaticFeatureCounts(file=0, functions=())
|
||||||
library_functions: Tuple[rdoc.LibraryFunction, ...] = ()
|
library_functions: tuple[rdoc.LibraryFunction, ...] = ()
|
||||||
|
|
||||||
assert isinstance(extractor, StaticFeatureExtractor)
|
assert isinstance(extractor, StaticFeatureExtractor)
|
||||||
functions: List[FunctionHandle] = list(extractor.get_functions())
|
functions: list[FunctionHandle] = list(extractor.get_functions())
|
||||||
n_funcs: int = len(functions)
|
n_funcs: int = len(functions)
|
||||||
n_libs: int = 0
|
n_libs: int = 0
|
||||||
percentage: float = 0
|
percentage: float = 0
|
||||||
|
|||||||
@@ -8,7 +8,7 @@
|
|||||||
|
|
||||||
import copy
|
import copy
|
||||||
import collections
|
import collections
|
||||||
from typing import TYPE_CHECKING, Set, Dict, List, Tuple, Union, Mapping, Iterable, Iterator
|
from typing import TYPE_CHECKING, Union, Mapping, Iterable, Iterator
|
||||||
|
|
||||||
import capa.perf
|
import capa.perf
|
||||||
import capa.features.common
|
import capa.features.common
|
||||||
@@ -27,7 +27,7 @@ if TYPE_CHECKING:
|
|||||||
# to collect the locations of a feature, do: `features[Number(0x10)]`
|
# to collect the locations of a feature, do: `features[Number(0x10)]`
|
||||||
#
|
#
|
||||||
# aliased here so that the type can be documented and xref'd.
|
# aliased here so that the type can be documented and xref'd.
|
||||||
FeatureSet = Dict[Feature, Set[Address]]
|
FeatureSet = dict[Feature, set[Address]]
|
||||||
|
|
||||||
|
|
||||||
class Statement:
|
class Statement:
|
||||||
@@ -94,7 +94,7 @@ class And(Statement):
|
|||||||
match if all of the children evaluate to True.
|
match if all of the children evaluate to True.
|
||||||
|
|
||||||
the order of evaluation is dictated by the property
|
the order of evaluation is dictated by the property
|
||||||
`And.children` (type: List[Statement|Feature]).
|
`And.children` (type: list[Statement|Feature]).
|
||||||
a query optimizer may safely manipulate the order of these children.
|
a query optimizer may safely manipulate the order of these children.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@@ -127,7 +127,7 @@ class Or(Statement):
|
|||||||
match if any of the children evaluate to True.
|
match if any of the children evaluate to True.
|
||||||
|
|
||||||
the order of evaluation is dictated by the property
|
the order of evaluation is dictated by the property
|
||||||
`Or.children` (type: List[Statement|Feature]).
|
`Or.children` (type: list[Statement|Feature]).
|
||||||
a query optimizer may safely manipulate the order of these children.
|
a query optimizer may safely manipulate the order of these children.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@@ -176,7 +176,7 @@ class Some(Statement):
|
|||||||
match if at least N of the children evaluate to True.
|
match if at least N of the children evaluate to True.
|
||||||
|
|
||||||
the order of evaluation is dictated by the property
|
the order of evaluation is dictated by the property
|
||||||
`Some.children` (type: List[Statement|Feature]).
|
`Some.children` (type: list[Statement|Feature]).
|
||||||
a query optimizer may safely manipulate the order of these children.
|
a query optimizer may safely manipulate the order of these children.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@@ -267,7 +267,7 @@ class Subscope(Statement):
|
|||||||
# inspect(match_details)
|
# inspect(match_details)
|
||||||
#
|
#
|
||||||
# aliased here so that the type can be documented and xref'd.
|
# aliased here so that the type can be documented and xref'd.
|
||||||
MatchResults = Mapping[str, List[Tuple[Address, Result]]]
|
MatchResults = Mapping[str, list[tuple[Address, Result]]]
|
||||||
|
|
||||||
|
|
||||||
def get_rule_namespaces(rule: "capa.rules.Rule") -> Iterator[str]:
|
def get_rule_namespaces(rule: "capa.rules.Rule") -> Iterator[str]:
|
||||||
@@ -292,7 +292,7 @@ def index_rule_matches(features: FeatureSet, rule: "capa.rules.Rule", locations:
|
|||||||
features[capa.features.common.MatchedRule(namespace)].update(locations)
|
features[capa.features.common.MatchedRule(namespace)].update(locations)
|
||||||
|
|
||||||
|
|
||||||
def match(rules: List["capa.rules.Rule"], features: FeatureSet, addr: Address) -> Tuple[FeatureSet, MatchResults]:
|
def match(rules: list["capa.rules.Rule"], features: FeatureSet, addr: Address) -> tuple[FeatureSet, MatchResults]:
|
||||||
"""
|
"""
|
||||||
match the given rules against the given features,
|
match the given rules against the given features,
|
||||||
returning an updated set of features and the matches.
|
returning an updated set of features and the matches.
|
||||||
|
|||||||
@@ -6,7 +6,6 @@
|
|||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from typing import Dict, List
|
|
||||||
|
|
||||||
from capa.helpers import assert_never
|
from capa.helpers import assert_never
|
||||||
|
|
||||||
@@ -22,7 +21,7 @@ COM_PREFIXES = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def load_com_database(com_type: ComType) -> Dict[str, List[str]]:
|
def load_com_database(com_type: ComType) -> dict[str, list[str]]:
|
||||||
# lazy load these python files since they are so large.
|
# lazy load these python files since they are so large.
|
||||||
# that is, don't load them unless a COM feature is being handled.
|
# that is, don't load them unless a COM feature is being handled.
|
||||||
import capa.features.com.classes
|
import capa.features.com.classes
|
||||||
|
|||||||
@@ -5,9 +5,8 @@
|
|||||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
from typing import Dict, List
|
|
||||||
|
|
||||||
COM_CLASSES: Dict[str, List[str]] = {
|
COM_CLASSES: dict[str, list[str]] = {
|
||||||
"ClusAppWiz": ["24F97150-6689-11D1-9AA7-00C04FB93A80"],
|
"ClusAppWiz": ["24F97150-6689-11D1-9AA7-00C04FB93A80"],
|
||||||
"ClusCfgAddNodesWizard": ["BB8D141E-C00A-469F-BC5C-ECD814F0BD74"],
|
"ClusCfgAddNodesWizard": ["BB8D141E-C00A-469F-BC5C-ECD814F0BD74"],
|
||||||
"ClusCfgCreateClusterWizard": ["B929818E-F5B0-44DC-8A00-1B5F5F5AA1F0"],
|
"ClusCfgCreateClusterWizard": ["B929818E-F5B0-44DC-8A00-1B5F5F5AA1F0"],
|
||||||
|
|||||||
@@ -5,9 +5,8 @@
|
|||||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
from typing import Dict, List
|
|
||||||
|
|
||||||
COM_INTERFACES: Dict[str, List[str]] = {
|
COM_INTERFACES: dict[str, list[str]] = {
|
||||||
"IClusterApplicationWizard": ["24F97151-6689-11D1-9AA7-00C04FB93A80"],
|
"IClusterApplicationWizard": ["24F97151-6689-11D1-9AA7-00C04FB93A80"],
|
||||||
"IWEExtendWizard97": ["97DEDE68-FC6B-11CF-B5F5-00A0C90AB505"],
|
"IWEExtendWizard97": ["97DEDE68-FC6B-11CF-B5F5-00A0C90AB505"],
|
||||||
"IWCWizard97Callback": ["97DEDE67-FC6B-11CF-B5F5-00A0C90AB505"],
|
"IWCWizard97Callback": ["97DEDE67-FC6B-11CF-B5F5-00A0C90AB505"],
|
||||||
@@ -16334,7 +16333,7 @@ COM_INTERFACES: Dict[str, List[str]] = {
|
|||||||
"IRcsServiceDescription": ["416437de-e78b-44c9-990f-7ede1f2a0c91"],
|
"IRcsServiceDescription": ["416437de-e78b-44c9-990f-7ede1f2a0c91"],
|
||||||
"IRcsServiceKindSupportedChangedEventArgs": ["f47ea244-e783-4866-b3a7-4e5ccf023070"],
|
"IRcsServiceKindSupportedChangedEventArgs": ["f47ea244-e783-4866-b3a7-4e5ccf023070"],
|
||||||
"IRcsServiceStatusChangedArgs": ["661ae45a-412a-460d-bdd4-dd8ea3c15583"],
|
"IRcsServiceStatusChangedArgs": ["661ae45a-412a-460d-bdd4-dd8ea3c15583"],
|
||||||
"IRcsServiceTuple": ["ce17a39b-2e8b-41af-b5a9-5cb072cc373c"],
|
"IRcsServicetuple": ["ce17a39b-2e8b-41af-b5a9-5cb072cc373c"],
|
||||||
"IRcsSubscriptionReceivedArgs": ["04eaf06d-42bc-46cc-a637-eeb3a8723fe4"],
|
"IRcsSubscriptionReceivedArgs": ["04eaf06d-42bc-46cc-a637-eeb3a8723fe4"],
|
||||||
"IRcsTransport": ["fea34759-f37c-4319-8546-ec84d21d30ff"],
|
"IRcsTransport": ["fea34759-f37c-4319-8546-ec84d21d30ff"],
|
||||||
"IRcsTransportConfiguration": ["1fccb102-2472-4bb9-9988-c1211c83e8a9"],
|
"IRcsTransportConfiguration": ["1fccb102-2472-4bb9-9988-c1211c83e8a9"],
|
||||||
|
|||||||
@@ -9,10 +9,9 @@
|
|||||||
import re
|
import re
|
||||||
import abc
|
import abc
|
||||||
import codecs
|
import codecs
|
||||||
import typing
|
|
||||||
import logging
|
import logging
|
||||||
import collections
|
import collections
|
||||||
from typing import TYPE_CHECKING, Set, Dict, List, Union, Optional
|
from typing import TYPE_CHECKING, Union, Optional
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
# circular import, otherwise
|
# circular import, otherwise
|
||||||
@@ -79,8 +78,8 @@ class Result:
|
|||||||
self,
|
self,
|
||||||
success: bool,
|
success: bool,
|
||||||
statement: Union["capa.engine.Statement", "Feature"],
|
statement: Union["capa.engine.Statement", "Feature"],
|
||||||
children: List["Result"],
|
children: list["Result"],
|
||||||
locations: Optional[Set[Address]] = None,
|
locations: Optional[set[Address]] = None,
|
||||||
):
|
):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.success = success
|
self.success = success
|
||||||
@@ -213,7 +212,7 @@ class Substring(String):
|
|||||||
|
|
||||||
# mapping from string value to list of locations.
|
# mapping from string value to list of locations.
|
||||||
# will unique the locations later on.
|
# will unique the locations later on.
|
||||||
matches: typing.DefaultDict[str, Set[Address]] = collections.defaultdict(set)
|
matches: collections.defaultdict[str, set[Address]] = collections.defaultdict(set)
|
||||||
|
|
||||||
assert isinstance(self.value, str)
|
assert isinstance(self.value, str)
|
||||||
for feature, locations in features.items():
|
for feature, locations in features.items():
|
||||||
@@ -261,7 +260,7 @@ class _MatchedSubstring(Substring):
|
|||||||
note: this type should only ever be constructed by `Substring.evaluate()`. it is not part of the public API.
|
note: this type should only ever be constructed by `Substring.evaluate()`. it is not part of the public API.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, substring: Substring, matches: Dict[str, Set[Address]]):
|
def __init__(self, substring: Substring, matches: dict[str, set[Address]]):
|
||||||
"""
|
"""
|
||||||
args:
|
args:
|
||||||
substring: the substring feature that matches.
|
substring: the substring feature that matches.
|
||||||
@@ -305,7 +304,7 @@ class Regex(String):
|
|||||||
|
|
||||||
# mapping from string value to list of locations.
|
# mapping from string value to list of locations.
|
||||||
# will unique the locations later on.
|
# will unique the locations later on.
|
||||||
matches: typing.DefaultDict[str, Set[Address]] = collections.defaultdict(set)
|
matches: collections.defaultdict[str, set[Address]] = collections.defaultdict(set)
|
||||||
|
|
||||||
for feature, locations in features.items():
|
for feature, locations in features.items():
|
||||||
if not isinstance(feature, (String,)):
|
if not isinstance(feature, (String,)):
|
||||||
@@ -353,7 +352,7 @@ class _MatchedRegex(Regex):
|
|||||||
note: this type should only ever be constructed by `Regex.evaluate()`. it is not part of the public API.
|
note: this type should only ever be constructed by `Regex.evaluate()`. it is not part of the public API.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, regex: Regex, matches: Dict[str, Set[Address]]):
|
def __init__(self, regex: Regex, matches: dict[str, set[Address]]):
|
||||||
"""
|
"""
|
||||||
args:
|
args:
|
||||||
regex: the regex feature that matches.
|
regex: the regex feature that matches.
|
||||||
@@ -467,6 +466,7 @@ FORMAT_VMRAY = "vmray"
|
|||||||
FORMAT_BINEXPORT2 = "binexport2"
|
FORMAT_BINEXPORT2 = "binexport2"
|
||||||
FORMAT_FREEZE = "freeze"
|
FORMAT_FREEZE = "freeze"
|
||||||
FORMAT_RESULT = "result"
|
FORMAT_RESULT = "result"
|
||||||
|
FORMAT_BINJA_DB = "binja_database"
|
||||||
STATIC_FORMATS = {
|
STATIC_FORMATS = {
|
||||||
FORMAT_SC32,
|
FORMAT_SC32,
|
||||||
FORMAT_SC64,
|
FORMAT_SC64,
|
||||||
@@ -476,6 +476,7 @@ STATIC_FORMATS = {
|
|||||||
FORMAT_FREEZE,
|
FORMAT_FREEZE,
|
||||||
FORMAT_RESULT,
|
FORMAT_RESULT,
|
||||||
FORMAT_BINEXPORT2,
|
FORMAT_BINEXPORT2,
|
||||||
|
FORMAT_BINJA_DB,
|
||||||
}
|
}
|
||||||
DYNAMIC_FORMATS = {
|
DYNAMIC_FORMATS = {
|
||||||
FORMAT_CAPE,
|
FORMAT_CAPE,
|
||||||
|
|||||||
@@ -11,13 +11,9 @@ import hashlib
|
|||||||
import dataclasses
|
import dataclasses
|
||||||
from copy import copy
|
from copy import copy
|
||||||
from types import MethodType
|
from types import MethodType
|
||||||
from typing import Any, Set, Dict, Tuple, Union, Iterator
|
from typing import Any, Union, Iterator, TypeAlias
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
|
||||||
# TODO(williballenthin): use typing.TypeAlias directly when Python 3.9 is deprecated
|
|
||||||
# https://github.com/mandiant/capa/issues/1699
|
|
||||||
from typing_extensions import TypeAlias
|
|
||||||
|
|
||||||
import capa.features.address
|
import capa.features.address
|
||||||
from capa.features.common import Feature
|
from capa.features.common import Feature
|
||||||
from capa.features.address import Address, ThreadAddress, ProcessAddress, DynamicCallAddress, AbsoluteVirtualAddress
|
from capa.features.address import Address, ThreadAddress, ProcessAddress, DynamicCallAddress, AbsoluteVirtualAddress
|
||||||
@@ -59,7 +55,7 @@ class FunctionHandle:
|
|||||||
|
|
||||||
address: Address
|
address: Address
|
||||||
inner: Any
|
inner: Any
|
||||||
ctx: Dict[str, Any] = dataclasses.field(default_factory=dict)
|
ctx: dict[str, Any] = dataclasses.field(default_factory=dict)
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@@ -135,7 +131,7 @@ class StaticFeatureExtractor:
|
|||||||
return self._sample_hashes
|
return self._sample_hashes
|
||||||
|
|
||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]:
|
def extract_global_features(self) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
extract features found at every scope ("global").
|
extract features found at every scope ("global").
|
||||||
|
|
||||||
@@ -146,12 +142,12 @@ class StaticFeatureExtractor:
|
|||||||
print('0x%x: %s', va, feature)
|
print('0x%x: %s', va, feature)
|
||||||
|
|
||||||
yields:
|
yields:
|
||||||
Tuple[Feature, Address]: feature and its location
|
tuple[Feature, Address]: feature and its location
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def extract_file_features(self) -> Iterator[Tuple[Feature, Address]]:
|
def extract_file_features(self) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
extract file-scope features.
|
extract file-scope features.
|
||||||
|
|
||||||
@@ -162,7 +158,7 @@ class StaticFeatureExtractor:
|
|||||||
print('0x%x: %s', va, feature)
|
print('0x%x: %s', va, feature)
|
||||||
|
|
||||||
yields:
|
yields:
|
||||||
Tuple[Feature, Address]: feature and its location
|
tuple[Feature, Address]: feature and its location
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
@@ -211,7 +207,7 @@ class StaticFeatureExtractor:
|
|||||||
raise KeyError(addr)
|
raise KeyError(addr)
|
||||||
|
|
||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def extract_function_features(self, f: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_function_features(self, f: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
extract function-scope features.
|
extract function-scope features.
|
||||||
the arguments are opaque values previously provided by `.get_functions()`, etc.
|
the arguments are opaque values previously provided by `.get_functions()`, etc.
|
||||||
@@ -227,7 +223,7 @@ class StaticFeatureExtractor:
|
|||||||
f [FunctionHandle]: an opaque value previously fetched from `.get_functions()`.
|
f [FunctionHandle]: an opaque value previously fetched from `.get_functions()`.
|
||||||
|
|
||||||
yields:
|
yields:
|
||||||
Tuple[Feature, Address]: feature and its location
|
tuple[Feature, Address]: feature and its location
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
@@ -240,7 +236,7 @@ class StaticFeatureExtractor:
|
|||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def extract_basic_block_features(self, f: FunctionHandle, bb: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_basic_block_features(self, f: FunctionHandle, bb: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
extract basic block-scope features.
|
extract basic block-scope features.
|
||||||
the arguments are opaque values previously provided by `.get_functions()`, etc.
|
the arguments are opaque values previously provided by `.get_functions()`, etc.
|
||||||
@@ -258,7 +254,7 @@ class StaticFeatureExtractor:
|
|||||||
bb [BBHandle]: an opaque value previously fetched from `.get_basic_blocks()`.
|
bb [BBHandle]: an opaque value previously fetched from `.get_basic_blocks()`.
|
||||||
|
|
||||||
yields:
|
yields:
|
||||||
Tuple[Feature, Address]: feature and its location
|
tuple[Feature, Address]: feature and its location
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
@@ -273,7 +269,7 @@ class StaticFeatureExtractor:
|
|||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def extract_insn_features(
|
def extract_insn_features(
|
||||||
self, f: FunctionHandle, bb: BBHandle, insn: InsnHandle
|
self, f: FunctionHandle, bb: BBHandle, insn: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
extract instruction-scope features.
|
extract instruction-scope features.
|
||||||
the arguments are opaque values previously provided by `.get_functions()`, etc.
|
the arguments are opaque values previously provided by `.get_functions()`, etc.
|
||||||
@@ -293,12 +289,12 @@ class StaticFeatureExtractor:
|
|||||||
insn [InsnHandle]: an opaque value previously fetched from `.get_instructions()`.
|
insn [InsnHandle]: an opaque value previously fetched from `.get_instructions()`.
|
||||||
|
|
||||||
yields:
|
yields:
|
||||||
Tuple[Feature, Address]: feature and its location
|
tuple[Feature, Address]: feature and its location
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
|
||||||
def FunctionFilter(extractor: StaticFeatureExtractor, functions: Set) -> StaticFeatureExtractor:
|
def FunctionFilter(extractor: StaticFeatureExtractor, functions: set) -> StaticFeatureExtractor:
|
||||||
original_get_functions = extractor.get_functions
|
original_get_functions = extractor.get_functions
|
||||||
|
|
||||||
def filtered_get_functions(self):
|
def filtered_get_functions(self):
|
||||||
@@ -387,7 +383,7 @@ class DynamicFeatureExtractor:
|
|||||||
return self._sample_hashes
|
return self._sample_hashes
|
||||||
|
|
||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]:
|
def extract_global_features(self) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
extract features found at every scope ("global").
|
extract features found at every scope ("global").
|
||||||
|
|
||||||
@@ -398,12 +394,12 @@ class DynamicFeatureExtractor:
|
|||||||
print(addr, feature)
|
print(addr, feature)
|
||||||
|
|
||||||
yields:
|
yields:
|
||||||
Tuple[Feature, Address]: feature and its location
|
tuple[Feature, Address]: feature and its location
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def extract_file_features(self) -> Iterator[Tuple[Feature, Address]]:
|
def extract_file_features(self) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
extract file-scope features.
|
extract file-scope features.
|
||||||
|
|
||||||
@@ -414,7 +410,7 @@ class DynamicFeatureExtractor:
|
|||||||
print(addr, feature)
|
print(addr, feature)
|
||||||
|
|
||||||
yields:
|
yields:
|
||||||
Tuple[Feature, Address]: feature and its location
|
tuple[Feature, Address]: feature and its location
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
@@ -426,7 +422,7 @@ class DynamicFeatureExtractor:
|
|||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def extract_process_features(self, ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_process_features(self, ph: ProcessHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
Yields all the features of a process. These include:
|
Yields all the features of a process. These include:
|
||||||
- file features of the process' image
|
- file features of the process' image
|
||||||
@@ -449,7 +445,7 @@ class DynamicFeatureExtractor:
|
|||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
Yields all the features of a thread. These include:
|
Yields all the features of a thread. These include:
|
||||||
- sequenced api traces
|
- sequenced api traces
|
||||||
@@ -466,7 +462,7 @@ class DynamicFeatureExtractor:
|
|||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def extract_call_features(
|
def extract_call_features(
|
||||||
self, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle
|
self, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
Yields all features of a call. These include:
|
Yields all features of a call. These include:
|
||||||
- api name
|
- api name
|
||||||
@@ -485,7 +481,7 @@ class DynamicFeatureExtractor:
|
|||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
|
||||||
def ProcessFilter(extractor: DynamicFeatureExtractor, processes: Set) -> DynamicFeatureExtractor:
|
def ProcessFilter(extractor: DynamicFeatureExtractor, processes: set) -> DynamicFeatureExtractor:
|
||||||
original_get_processes = extractor.get_processes
|
original_get_processes = extractor.get_processes
|
||||||
|
|
||||||
def filtered_get_processes(self):
|
def filtered_get_processes(self):
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ import io
|
|||||||
import hashlib
|
import hashlib
|
||||||
import logging
|
import logging
|
||||||
import contextlib
|
import contextlib
|
||||||
from typing import Set, Dict, List, Tuple, Iterator
|
from typing import Iterator
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
@@ -51,13 +51,13 @@ def compute_common_prefix_length(m: str, n: str) -> int:
|
|||||||
return len(m)
|
return len(m)
|
||||||
|
|
||||||
|
|
||||||
def get_sample_from_binexport2(input_file: Path, be2: BinExport2, search_paths: List[Path]) -> Path:
|
def get_sample_from_binexport2(input_file: Path, be2: BinExport2, search_paths: list[Path]) -> Path:
|
||||||
"""attempt to find the sample file, given a BinExport2 file.
|
"""attempt to find the sample file, given a BinExport2 file.
|
||||||
|
|
||||||
searches in the same directory as the BinExport2 file, and then in search_paths.
|
searches in the same directory as the BinExport2 file, and then in search_paths.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def filename_similarity_key(p: Path) -> Tuple[int, str]:
|
def filename_similarity_key(p: Path) -> tuple[int, str]:
|
||||||
# note closure over input_file.
|
# note closure over input_file.
|
||||||
# sort first by length of common prefix, then by name (for stability)
|
# sort first by length of common prefix, then by name (for stability)
|
||||||
return (compute_common_prefix_length(p.name, input_file.name), p.name)
|
return (compute_common_prefix_length(p.name, input_file.name), p.name)
|
||||||
@@ -65,7 +65,7 @@ def get_sample_from_binexport2(input_file: Path, be2: BinExport2, search_paths:
|
|||||||
wanted_sha256: str = be2.meta_information.executable_id.lower()
|
wanted_sha256: str = be2.meta_information.executable_id.lower()
|
||||||
|
|
||||||
input_directory: Path = input_file.parent
|
input_directory: Path = input_file.parent
|
||||||
siblings: List[Path] = [p for p in input_directory.iterdir() if p.is_file()]
|
siblings: list[Path] = [p for p in input_directory.iterdir() if p.is_file()]
|
||||||
siblings.sort(key=filename_similarity_key, reverse=True)
|
siblings.sort(key=filename_similarity_key, reverse=True)
|
||||||
for sibling in siblings:
|
for sibling in siblings:
|
||||||
# e.g. with open IDA files in the same directory on Windows
|
# e.g. with open IDA files in the same directory on Windows
|
||||||
@@ -74,7 +74,7 @@ def get_sample_from_binexport2(input_file: Path, be2: BinExport2, search_paths:
|
|||||||
return sibling
|
return sibling
|
||||||
|
|
||||||
for search_path in search_paths:
|
for search_path in search_paths:
|
||||||
candidates: List[Path] = [p for p in search_path.iterdir() if p.is_file()]
|
candidates: list[Path] = [p for p in search_path.iterdir() if p.is_file()]
|
||||||
candidates.sort(key=filename_similarity_key, reverse=True)
|
candidates.sort(key=filename_similarity_key, reverse=True)
|
||||||
for candidate in candidates:
|
for candidate in candidates:
|
||||||
with contextlib.suppress(PermissionError):
|
with contextlib.suppress(PermissionError):
|
||||||
@@ -88,27 +88,27 @@ class BinExport2Index:
|
|||||||
def __init__(self, be2: BinExport2):
|
def __init__(self, be2: BinExport2):
|
||||||
self.be2: BinExport2 = be2
|
self.be2: BinExport2 = be2
|
||||||
|
|
||||||
self.callers_by_vertex_index: Dict[int, List[int]] = defaultdict(list)
|
self.callers_by_vertex_index: dict[int, list[int]] = defaultdict(list)
|
||||||
self.callees_by_vertex_index: Dict[int, List[int]] = defaultdict(list)
|
self.callees_by_vertex_index: dict[int, list[int]] = defaultdict(list)
|
||||||
|
|
||||||
# note: flow graph != call graph (vertex)
|
# note: flow graph != call graph (vertex)
|
||||||
self.flow_graph_index_by_address: Dict[int, int] = {}
|
self.flow_graph_index_by_address: dict[int, int] = {}
|
||||||
self.flow_graph_address_by_index: Dict[int, int] = {}
|
self.flow_graph_address_by_index: dict[int, int] = {}
|
||||||
|
|
||||||
# edges that come from the given basic block
|
# edges that come from the given basic block
|
||||||
self.source_edges_by_basic_block_index: Dict[int, List[BinExport2.FlowGraph.Edge]] = defaultdict(list)
|
self.source_edges_by_basic_block_index: dict[int, list[BinExport2.FlowGraph.Edge]] = defaultdict(list)
|
||||||
# edges that end up at the given basic block
|
# edges that end up at the given basic block
|
||||||
self.target_edges_by_basic_block_index: Dict[int, List[BinExport2.FlowGraph.Edge]] = defaultdict(list)
|
self.target_edges_by_basic_block_index: dict[int, list[BinExport2.FlowGraph.Edge]] = defaultdict(list)
|
||||||
|
|
||||||
self.vertex_index_by_address: Dict[int, int] = {}
|
self.vertex_index_by_address: dict[int, int] = {}
|
||||||
|
|
||||||
self.data_reference_index_by_source_instruction_index: Dict[int, List[int]] = defaultdict(list)
|
self.data_reference_index_by_source_instruction_index: dict[int, list[int]] = defaultdict(list)
|
||||||
self.data_reference_index_by_target_address: Dict[int, List[int]] = defaultdict(list)
|
self.data_reference_index_by_target_address: dict[int, list[int]] = defaultdict(list)
|
||||||
self.string_reference_index_by_source_instruction_index: Dict[int, List[int]] = defaultdict(list)
|
self.string_reference_index_by_source_instruction_index: dict[int, list[int]] = defaultdict(list)
|
||||||
|
|
||||||
self.insn_address_by_index: Dict[int, int] = {}
|
self.insn_address_by_index: dict[int, int] = {}
|
||||||
self.insn_index_by_address: Dict[int, int] = {}
|
self.insn_index_by_address: dict[int, int] = {}
|
||||||
self.insn_by_address: Dict[int, BinExport2.Instruction] = {}
|
self.insn_by_address: dict[int, BinExport2.Instruction] = {}
|
||||||
|
|
||||||
# must index instructions first
|
# must index instructions first
|
||||||
self._index_insn_addresses()
|
self._index_insn_addresses()
|
||||||
@@ -208,7 +208,7 @@ class BinExport2Index:
|
|||||||
|
|
||||||
def basic_block_instructions(
|
def basic_block_instructions(
|
||||||
self, basic_block: BinExport2.BasicBlock
|
self, basic_block: BinExport2.BasicBlock
|
||||||
) -> Iterator[Tuple[int, BinExport2.Instruction, int]]:
|
) -> Iterator[tuple[int, BinExport2.Instruction, int]]:
|
||||||
"""
|
"""
|
||||||
For a given basic block, enumerate the instruction indices,
|
For a given basic block, enumerate the instruction indices,
|
||||||
the instruction instances, and their addresses.
|
the instruction instances, and their addresses.
|
||||||
@@ -253,7 +253,7 @@ class BinExport2Analysis:
|
|||||||
self.idx: BinExport2Index = idx
|
self.idx: BinExport2Index = idx
|
||||||
self.buf: bytes = buf
|
self.buf: bytes = buf
|
||||||
self.base_address: int = 0
|
self.base_address: int = 0
|
||||||
self.thunks: Dict[int, int] = {}
|
self.thunks: dict[int, int] = {}
|
||||||
|
|
||||||
self._find_base_address()
|
self._find_base_address()
|
||||||
self._compute_thunks()
|
self._compute_thunks()
|
||||||
@@ -279,12 +279,14 @@ class BinExport2Analysis:
|
|||||||
|
|
||||||
curr_idx: int = idx
|
curr_idx: int = idx
|
||||||
for _ in range(capa.features.common.THUNK_CHAIN_DEPTH_DELTA):
|
for _ in range(capa.features.common.THUNK_CHAIN_DEPTH_DELTA):
|
||||||
thunk_callees: List[int] = self.idx.callees_by_vertex_index[curr_idx]
|
thunk_callees: list[int] = self.idx.callees_by_vertex_index[curr_idx]
|
||||||
# if this doesn't hold, then it doesn't seem like this is a thunk,
|
# If this doesn't hold, then it doesn't seem like this is a thunk,
|
||||||
# because either, len is:
|
# because either, len is:
|
||||||
# 0 and the thunk doesn't point to anything, or
|
# 0 and the thunk doesn't point to anything or is indirect, like `call eax`, or
|
||||||
# >1 and the thunk may end up at many functions.
|
# >1 and the thunk may end up at many functions.
|
||||||
assert len(thunk_callees) == 1, f"thunk @ {hex(addr)} failed"
|
# In any case, this doesn't appear to be the sort of thunk we're looking for.
|
||||||
|
if len(thunk_callees) != 1:
|
||||||
|
break
|
||||||
|
|
||||||
thunked_idx: int = thunk_callees[0]
|
thunked_idx: int = thunk_callees[0]
|
||||||
thunked_vertex: BinExport2.CallGraph.Vertex = self.be2.call_graph.vertex[thunked_idx]
|
thunked_vertex: BinExport2.CallGraph.Vertex = self.be2.call_graph.vertex[thunked_idx]
|
||||||
@@ -324,7 +326,7 @@ class AddressNotMappedError(ReadMemoryError): ...
|
|||||||
@dataclass
|
@dataclass
|
||||||
class AddressSpace:
|
class AddressSpace:
|
||||||
base_address: int
|
base_address: int
|
||||||
memory_regions: Tuple[MemoryRegion, ...]
|
memory_regions: tuple[MemoryRegion, ...]
|
||||||
|
|
||||||
def read_memory(self, address: int, length: int) -> bytes:
|
def read_memory(self, address: int, length: int) -> bytes:
|
||||||
rva: int = address - self.base_address
|
rva: int = address - self.base_address
|
||||||
@@ -337,7 +339,7 @@ class AddressSpace:
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_pe(cls, pe: PE, base_address: int):
|
def from_pe(cls, pe: PE, base_address: int):
|
||||||
regions: List[MemoryRegion] = []
|
regions: list[MemoryRegion] = []
|
||||||
for section in pe.sections:
|
for section in pe.sections:
|
||||||
address: int = section.VirtualAddress
|
address: int = section.VirtualAddress
|
||||||
size: int = section.Misc_VirtualSize
|
size: int = section.Misc_VirtualSize
|
||||||
@@ -355,7 +357,7 @@ class AddressSpace:
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_elf(cls, elf: ELFFile, base_address: int):
|
def from_elf(cls, elf: ELFFile, base_address: int):
|
||||||
regions: List[MemoryRegion] = []
|
regions: list[MemoryRegion] = []
|
||||||
|
|
||||||
# ELF segments are for runtime data,
|
# ELF segments are for runtime data,
|
||||||
# ELF sections are for link-time data.
|
# ELF sections are for link-time data.
|
||||||
@@ -401,9 +403,9 @@ class AnalysisContext:
|
|||||||
class FunctionContext:
|
class FunctionContext:
|
||||||
ctx: AnalysisContext
|
ctx: AnalysisContext
|
||||||
flow_graph_index: int
|
flow_graph_index: int
|
||||||
format: Set[str]
|
format: set[str]
|
||||||
os: Set[str]
|
os: set[str]
|
||||||
arch: Set[str]
|
arch: set[str]
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
import logging
|
import logging
|
||||||
from typing import List, Tuple, Iterator, Optional
|
from typing import Iterator, Optional
|
||||||
|
|
||||||
import capa.features.extractors.binexport2.helpers
|
import capa.features.extractors.binexport2.helpers
|
||||||
from capa.features.insn import MAX_STRUCTURE_SIZE, Number, Offset, OperandNumber, OperandOffset
|
from capa.features.insn import MAX_STRUCTURE_SIZE, Number, Offset, OperandNumber, OperandOffset
|
||||||
@@ -30,7 +30,7 @@ logger = logging.getLogger(__name__)
|
|||||||
|
|
||||||
def extract_insn_number_features(
|
def extract_insn_number_features(
|
||||||
fh: FunctionHandle, _bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, _bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
fhi: FunctionContext = fh.inner
|
fhi: FunctionContext = fh.inner
|
||||||
ii: InstructionContext = ih.inner
|
ii: InstructionContext = ih.inner
|
||||||
|
|
||||||
@@ -91,7 +91,7 @@ OFFSET_PATTERNS = BinExport2InstructionPatternMatcher.from_str(
|
|||||||
|
|
||||||
def extract_insn_offset_features(
|
def extract_insn_offset_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
fhi: FunctionContext = fh.inner
|
fhi: FunctionContext = fh.inner
|
||||||
ii: InstructionContext = ih.inner
|
ii: InstructionContext = ih.inner
|
||||||
|
|
||||||
@@ -120,7 +120,7 @@ NZXOR_PATTERNS = BinExport2InstructionPatternMatcher.from_str(
|
|||||||
|
|
||||||
def extract_insn_nzxor_characteristic_features(
|
def extract_insn_nzxor_characteristic_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
fhi: FunctionContext = fh.inner
|
fhi: FunctionContext = fh.inner
|
||||||
ii: InstructionContext = ih.inner
|
ii: InstructionContext = ih.inner
|
||||||
be2: BinExport2 = fhi.ctx.be2
|
be2: BinExport2 = fhi.ctx.be2
|
||||||
@@ -131,7 +131,7 @@ def extract_insn_nzxor_characteristic_features(
|
|||||||
instruction: BinExport2.Instruction = be2.instruction[ii.instruction_index]
|
instruction: BinExport2.Instruction = be2.instruction[ii.instruction_index]
|
||||||
# guaranteed to be simple int/reg operands
|
# guaranteed to be simple int/reg operands
|
||||||
# so we don't have to realize the tree/list.
|
# so we don't have to realize the tree/list.
|
||||||
operands: List[BinExport2.Operand] = [be2.operand[operand_index] for operand_index in instruction.operand_index]
|
operands: list[BinExport2.Operand] = [be2.operand[operand_index] for operand_index in instruction.operand_index]
|
||||||
|
|
||||||
if operands[1] != operands[2]:
|
if operands[1] != operands[2]:
|
||||||
yield Characteristic("nzxor"), ih.address
|
yield Characteristic("nzxor"), ih.address
|
||||||
@@ -146,7 +146,7 @@ INDIRECT_CALL_PATTERNS = BinExport2InstructionPatternMatcher.from_str(
|
|||||||
|
|
||||||
def extract_function_indirect_call_characteristic_features(
|
def extract_function_indirect_call_characteristic_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
fhi: FunctionContext = fh.inner
|
fhi: FunctionContext = fh.inner
|
||||||
ii: InstructionContext = ih.inner
|
ii: InstructionContext = ih.inner
|
||||||
be2: BinExport2 = fhi.ctx.be2
|
be2: BinExport2 = fhi.ctx.be2
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
from typing import List, Optional
|
from typing import Optional
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
|
||||||
from capa.features.extractors.binexport2.helpers import get_operand_expressions
|
from capa.features.extractors.binexport2.helpers import get_operand_expressions
|
||||||
@@ -32,7 +32,7 @@ def get_operand_phrase_info(be2: BinExport2, operand: BinExport2.Operand) -> Opt
|
|||||||
# Base: Any general purpose register
|
# Base: Any general purpose register
|
||||||
# Displacement: An integral offset
|
# Displacement: An integral offset
|
||||||
|
|
||||||
expressions: List[BinExport2.Expression] = get_operand_expressions(be2, operand)
|
expressions: list[BinExport2.Expression] = get_operand_expressions(be2, operand)
|
||||||
|
|
||||||
# skip expression up to and including BinExport2.Expression.DEREFERENCE, assume caller
|
# skip expression up to and including BinExport2.Expression.DEREFERENCE, assume caller
|
||||||
# has checked for BinExport2.Expression.DEREFERENCE
|
# has checked for BinExport2.Expression.DEREFERENCE
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
import logging
|
import logging
|
||||||
from typing import List, Tuple, Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
import capa.features.extractors.strings
|
import capa.features.extractors.strings
|
||||||
import capa.features.extractors.binexport2.helpers
|
import capa.features.extractors.binexport2.helpers
|
||||||
@@ -63,7 +63,7 @@ NUMBER_PATTERNS = BinExport2InstructionPatternMatcher.from_str(
|
|||||||
|
|
||||||
def extract_insn_number_features(
|
def extract_insn_number_features(
|
||||||
fh: FunctionHandle, _bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, _bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
fhi: FunctionContext = fh.inner
|
fhi: FunctionContext = fh.inner
|
||||||
ii: InstructionContext = ih.inner
|
ii: InstructionContext = ih.inner
|
||||||
|
|
||||||
@@ -123,7 +123,7 @@ OFFSET_ZERO_PATTERNS = BinExport2InstructionPatternMatcher.from_str(
|
|||||||
|
|
||||||
def extract_insn_offset_features(
|
def extract_insn_offset_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
fhi: FunctionContext = fh.inner
|
fhi: FunctionContext = fh.inner
|
||||||
ii: InstructionContext = ih.inner
|
ii: InstructionContext = ih.inner
|
||||||
|
|
||||||
@@ -161,7 +161,7 @@ def is_security_cookie(
|
|||||||
|
|
||||||
# security cookie check should use SP or BP
|
# security cookie check should use SP or BP
|
||||||
op1: BinExport2.Operand = be2.operand[instruction.operand_index[1]]
|
op1: BinExport2.Operand = be2.operand[instruction.operand_index[1]]
|
||||||
op1_exprs: List[BinExport2.Expression] = [be2.expression[expr_i] for expr_i in op1.expression_index]
|
op1_exprs: list[BinExport2.Expression] = [be2.expression[expr_i] for expr_i in op1.expression_index]
|
||||||
if all(expr.symbol.lower() not in ("bp", "esp", "ebp", "rbp", "rsp") for expr in op1_exprs):
|
if all(expr.symbol.lower() not in ("bp", "esp", "ebp", "rbp", "rsp") for expr in op1_exprs):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@@ -192,7 +192,7 @@ NZXOR_PATTERNS = BinExport2InstructionPatternMatcher.from_str(
|
|||||||
|
|
||||||
def extract_insn_nzxor_characteristic_features(
|
def extract_insn_nzxor_characteristic_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse non-zeroing XOR instruction from the given instruction.
|
parse non-zeroing XOR instruction from the given instruction.
|
||||||
ignore expected non-zeroing XORs, e.g. security cookies.
|
ignore expected non-zeroing XORs, e.g. security cookies.
|
||||||
@@ -209,7 +209,7 @@ def extract_insn_nzxor_characteristic_features(
|
|||||||
instruction: BinExport2.Instruction = be2.instruction[ii.instruction_index]
|
instruction: BinExport2.Instruction = be2.instruction[ii.instruction_index]
|
||||||
# guaranteed to be simple int/reg operands
|
# guaranteed to be simple int/reg operands
|
||||||
# so we don't have to realize the tree/list.
|
# so we don't have to realize the tree/list.
|
||||||
operands: List[BinExport2.Operand] = [be2.operand[operand_index] for operand_index in instruction.operand_index]
|
operands: list[BinExport2.Operand] = [be2.operand[operand_index] for operand_index in instruction.operand_index]
|
||||||
|
|
||||||
if operands[0] == operands[1]:
|
if operands[0] == operands[1]:
|
||||||
return
|
return
|
||||||
@@ -236,7 +236,7 @@ INDIRECT_CALL_PATTERNS = BinExport2InstructionPatternMatcher.from_str(
|
|||||||
|
|
||||||
def extract_function_indirect_call_characteristic_features(
|
def extract_function_indirect_call_characteristic_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
fhi: FunctionContext = fh.inner
|
fhi: FunctionContext = fh.inner
|
||||||
ii: InstructionContext = ih.inner
|
ii: InstructionContext = ih.inner
|
||||||
be2: BinExport2 = fhi.ctx.be2
|
be2: BinExport2 = fhi.ctx.be2
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
|
||||||
from typing import List, Tuple, Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
from capa.features.common import Feature, Characteristic
|
from capa.features.common import Feature, Characteristic
|
||||||
from capa.features.address import Address, AbsoluteVirtualAddress
|
from capa.features.address import Address, AbsoluteVirtualAddress
|
||||||
@@ -16,20 +16,20 @@ from capa.features.extractors.base_extractor import BBHandle, FunctionHandle
|
|||||||
from capa.features.extractors.binexport2.binexport2_pb2 import BinExport2
|
from capa.features.extractors.binexport2.binexport2_pb2 import BinExport2
|
||||||
|
|
||||||
|
|
||||||
def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
fhi: FunctionContext = fh.inner
|
fhi: FunctionContext = fh.inner
|
||||||
bbi: BasicBlockContext = bbh.inner
|
bbi: BasicBlockContext = bbh.inner
|
||||||
|
|
||||||
idx = fhi.ctx.idx
|
idx = fhi.ctx.idx
|
||||||
|
|
||||||
basic_block_index: int = bbi.basic_block_index
|
basic_block_index: int = bbi.basic_block_index
|
||||||
target_edges: List[BinExport2.FlowGraph.Edge] = idx.target_edges_by_basic_block_index[basic_block_index]
|
target_edges: list[BinExport2.FlowGraph.Edge] = idx.target_edges_by_basic_block_index[basic_block_index]
|
||||||
if basic_block_index in (e.source_basic_block_index for e in target_edges):
|
if basic_block_index in (e.source_basic_block_index for e in target_edges):
|
||||||
basic_block_address: int = idx.get_basic_block_address(basic_block_index)
|
basic_block_address: int = idx.get_basic_block_address(basic_block_index)
|
||||||
yield Characteristic("tight loop"), AbsoluteVirtualAddress(basic_block_address)
|
yield Characteristic("tight loop"), AbsoluteVirtualAddress(basic_block_address)
|
||||||
|
|
||||||
|
|
||||||
def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""extract basic block features"""
|
"""extract basic block features"""
|
||||||
for bb_handler in BASIC_BLOCK_HANDLERS:
|
for bb_handler in BASIC_BLOCK_HANDLERS:
|
||||||
for feature, addr in bb_handler(fh, bbh):
|
for feature, addr in bb_handler(fh, bbh):
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
import logging
|
import logging
|
||||||
from typing import Set, List, Tuple, Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
import capa.features.extractors.elf
|
import capa.features.extractors.elf
|
||||||
import capa.features.extractors.common
|
import capa.features.extractors.common
|
||||||
@@ -48,14 +48,14 @@ class BinExport2FeatureExtractor(StaticFeatureExtractor):
|
|||||||
address_space: AddressSpace = AddressSpace.from_buf(buf, self.analysis.base_address)
|
address_space: AddressSpace = AddressSpace.from_buf(buf, self.analysis.base_address)
|
||||||
self.ctx: AnalysisContext = AnalysisContext(self.buf, self.be2, self.idx, self.analysis, address_space)
|
self.ctx: AnalysisContext = AnalysisContext(self.buf, self.be2, self.idx, self.analysis, address_space)
|
||||||
|
|
||||||
self.global_features: List[Tuple[Feature, Address]] = []
|
self.global_features: list[tuple[Feature, Address]] = []
|
||||||
self.global_features.extend(list(capa.features.extractors.common.extract_format(self.buf)))
|
self.global_features.extend(list(capa.features.extractors.common.extract_format(self.buf)))
|
||||||
self.global_features.extend(list(capa.features.extractors.common.extract_os(self.buf)))
|
self.global_features.extend(list(capa.features.extractors.common.extract_os(self.buf)))
|
||||||
self.global_features.extend(list(capa.features.extractors.common.extract_arch(self.buf)))
|
self.global_features.extend(list(capa.features.extractors.common.extract_arch(self.buf)))
|
||||||
|
|
||||||
self.format: Set[str] = set()
|
self.format: set[str] = set()
|
||||||
self.os: Set[str] = set()
|
self.os: set[str] = set()
|
||||||
self.arch: Set[str] = set()
|
self.arch: set[str] = set()
|
||||||
|
|
||||||
for feature, _ in self.global_features:
|
for feature, _ in self.global_features:
|
||||||
assert isinstance(feature.value, str)
|
assert isinstance(feature.value, str)
|
||||||
@@ -72,10 +72,10 @@ class BinExport2FeatureExtractor(StaticFeatureExtractor):
|
|||||||
def get_base_address(self) -> AbsoluteVirtualAddress:
|
def get_base_address(self) -> AbsoluteVirtualAddress:
|
||||||
return AbsoluteVirtualAddress(self.analysis.base_address)
|
return AbsoluteVirtualAddress(self.analysis.base_address)
|
||||||
|
|
||||||
def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]:
|
def extract_global_features(self) -> Iterator[tuple[Feature, Address]]:
|
||||||
yield from self.global_features
|
yield from self.global_features
|
||||||
|
|
||||||
def extract_file_features(self) -> Iterator[Tuple[Feature, Address]]:
|
def extract_file_features(self) -> Iterator[tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.binexport2.file.extract_features(self.be2, self.buf)
|
yield from capa.features.extractors.binexport2.file.extract_features(self.be2, self.buf)
|
||||||
|
|
||||||
def get_functions(self) -> Iterator[FunctionHandle]:
|
def get_functions(self) -> Iterator[FunctionHandle]:
|
||||||
@@ -97,7 +97,7 @@ class BinExport2FeatureExtractor(StaticFeatureExtractor):
|
|||||||
inner=FunctionContext(self.ctx, flow_graph_index, self.format, self.os, self.arch),
|
inner=FunctionContext(self.ctx, flow_graph_index, self.format, self.os, self.arch),
|
||||||
)
|
)
|
||||||
|
|
||||||
def extract_function_features(self, fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_function_features(self, fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.binexport2.function.extract_features(fh)
|
yield from capa.features.extractors.binexport2.function.extract_features(fh)
|
||||||
|
|
||||||
def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]:
|
def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]:
|
||||||
@@ -112,7 +112,7 @@ class BinExport2FeatureExtractor(StaticFeatureExtractor):
|
|||||||
inner=BasicBlockContext(basic_block_index),
|
inner=BasicBlockContext(basic_block_index),
|
||||||
)
|
)
|
||||||
|
|
||||||
def extract_basic_block_features(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_basic_block_features(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.binexport2.basicblock.extract_features(fh, bbh)
|
yield from capa.features.extractors.binexport2.basicblock.extract_features(fh, bbh)
|
||||||
|
|
||||||
def get_instructions(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[InsnHandle]:
|
def get_instructions(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[InsnHandle]:
|
||||||
@@ -126,5 +126,5 @@ class BinExport2FeatureExtractor(StaticFeatureExtractor):
|
|||||||
|
|
||||||
def extract_insn_features(
|
def extract_insn_features(
|
||||||
self, fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
self, fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.binexport2.insn.extract_features(fh, bbh, ih)
|
yield from capa.features.extractors.binexport2.insn.extract_features(fh, bbh, ih)
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
import io
|
import io
|
||||||
import logging
|
import logging
|
||||||
from typing import Tuple, Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
import pefile
|
import pefile
|
||||||
from elftools.elf.elffile import ELFFile
|
from elftools.elf.elffile import ELFFile
|
||||||
@@ -23,7 +23,7 @@ from capa.features.extractors.binexport2.binexport2_pb2 import BinExport2
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_export_names(_be2: BinExport2, buf: bytes) -> Iterator[Tuple[Feature, Address]]:
|
def extract_file_export_names(_be2: BinExport2, buf: bytes) -> Iterator[tuple[Feature, Address]]:
|
||||||
if buf.startswith(capa.features.extractors.common.MATCH_PE):
|
if buf.startswith(capa.features.extractors.common.MATCH_PE):
|
||||||
pe: pefile.PE = pefile.PE(data=buf)
|
pe: pefile.PE = pefile.PE(data=buf)
|
||||||
yield from capa.features.extractors.pefile.extract_file_export_names(pe)
|
yield from capa.features.extractors.pefile.extract_file_export_names(pe)
|
||||||
@@ -34,7 +34,7 @@ def extract_file_export_names(_be2: BinExport2, buf: bytes) -> Iterator[Tuple[Fe
|
|||||||
logger.warning("unsupported format")
|
logger.warning("unsupported format")
|
||||||
|
|
||||||
|
|
||||||
def extract_file_import_names(_be2: BinExport2, buf: bytes) -> Iterator[Tuple[Feature, Address]]:
|
def extract_file_import_names(_be2: BinExport2, buf: bytes) -> Iterator[tuple[Feature, Address]]:
|
||||||
if buf.startswith(capa.features.extractors.common.MATCH_PE):
|
if buf.startswith(capa.features.extractors.common.MATCH_PE):
|
||||||
pe: pefile.PE = pefile.PE(data=buf)
|
pe: pefile.PE = pefile.PE(data=buf)
|
||||||
yield from capa.features.extractors.pefile.extract_file_import_names(pe)
|
yield from capa.features.extractors.pefile.extract_file_import_names(pe)
|
||||||
@@ -45,7 +45,7 @@ def extract_file_import_names(_be2: BinExport2, buf: bytes) -> Iterator[Tuple[Fe
|
|||||||
logger.warning("unsupported format")
|
logger.warning("unsupported format")
|
||||||
|
|
||||||
|
|
||||||
def extract_file_section_names(_be2: BinExport2, buf: bytes) -> Iterator[Tuple[Feature, Address]]:
|
def extract_file_section_names(_be2: BinExport2, buf: bytes) -> Iterator[tuple[Feature, Address]]:
|
||||||
if buf.startswith(capa.features.extractors.common.MATCH_PE):
|
if buf.startswith(capa.features.extractors.common.MATCH_PE):
|
||||||
pe: pefile.PE = pefile.PE(data=buf)
|
pe: pefile.PE = pefile.PE(data=buf)
|
||||||
yield from capa.features.extractors.pefile.extract_file_section_names(pe)
|
yield from capa.features.extractors.pefile.extract_file_section_names(pe)
|
||||||
@@ -56,15 +56,15 @@ def extract_file_section_names(_be2: BinExport2, buf: bytes) -> Iterator[Tuple[F
|
|||||||
logger.warning("unsupported format")
|
logger.warning("unsupported format")
|
||||||
|
|
||||||
|
|
||||||
def extract_file_strings(_be2: BinExport2, buf: bytes) -> Iterator[Tuple[Feature, Address]]:
|
def extract_file_strings(_be2: BinExport2, buf: bytes) -> Iterator[tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.common.extract_file_strings(buf)
|
yield from capa.features.extractors.common.extract_file_strings(buf)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_format(_be2: BinExport2, buf: bytes) -> Iterator[Tuple[Feature, Address]]:
|
def extract_file_format(_be2: BinExport2, buf: bytes) -> Iterator[tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.common.extract_format(buf)
|
yield from capa.features.extractors.common.extract_format(buf)
|
||||||
|
|
||||||
|
|
||||||
def extract_features(be2: BinExport2, buf: bytes) -> Iterator[Tuple[Feature, Address]]:
|
def extract_features(be2: BinExport2, buf: bytes) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""extract file features"""
|
"""extract file features"""
|
||||||
for file_handler in FILE_HANDLERS:
|
for file_handler in FILE_HANDLERS:
|
||||||
for feature, addr in file_handler(be2, buf):
|
for feature, addr in file_handler(be2, buf):
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
from typing import List, Tuple, Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
from capa.features.file import FunctionName
|
from capa.features.file import FunctionName
|
||||||
from capa.features.common import Feature, Characteristic
|
from capa.features.common import Feature, Characteristic
|
||||||
@@ -16,7 +16,7 @@ from capa.features.extractors.base_extractor import FunctionHandle
|
|||||||
from capa.features.extractors.binexport2.binexport2_pb2 import BinExport2
|
from capa.features.extractors.binexport2.binexport2_pb2 import BinExport2
|
||||||
|
|
||||||
|
|
||||||
def extract_function_calls_to(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_function_calls_to(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
fhi: FunctionContext = fh.inner
|
fhi: FunctionContext = fh.inner
|
||||||
|
|
||||||
be2: BinExport2 = fhi.ctx.be2
|
be2: BinExport2 = fhi.ctx.be2
|
||||||
@@ -32,7 +32,7 @@ def extract_function_calls_to(fh: FunctionHandle) -> Iterator[Tuple[Feature, Add
|
|||||||
yield Characteristic("calls to"), AbsoluteVirtualAddress(caller_address)
|
yield Characteristic("calls to"), AbsoluteVirtualAddress(caller_address)
|
||||||
|
|
||||||
|
|
||||||
def extract_function_loop(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_function_loop(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
fhi: FunctionContext = fh.inner
|
fhi: FunctionContext = fh.inner
|
||||||
|
|
||||||
be2: BinExport2 = fhi.ctx.be2
|
be2: BinExport2 = fhi.ctx.be2
|
||||||
@@ -40,7 +40,7 @@ def extract_function_loop(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address
|
|||||||
flow_graph_index: int = fhi.flow_graph_index
|
flow_graph_index: int = fhi.flow_graph_index
|
||||||
flow_graph: BinExport2.FlowGraph = be2.flow_graph[flow_graph_index]
|
flow_graph: BinExport2.FlowGraph = be2.flow_graph[flow_graph_index]
|
||||||
|
|
||||||
edges: List[Tuple[int, int]] = []
|
edges: list[tuple[int, int]] = []
|
||||||
for edge in flow_graph.edge:
|
for edge in flow_graph.edge:
|
||||||
edges.append((edge.source_basic_block_index, edge.target_basic_block_index))
|
edges.append((edge.source_basic_block_index, edge.target_basic_block_index))
|
||||||
|
|
||||||
@@ -48,7 +48,7 @@ def extract_function_loop(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address
|
|||||||
yield Characteristic("loop"), fh.address
|
yield Characteristic("loop"), fh.address
|
||||||
|
|
||||||
|
|
||||||
def extract_function_name(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_function_name(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
fhi: FunctionContext = fh.inner
|
fhi: FunctionContext = fh.inner
|
||||||
|
|
||||||
be2: BinExport2 = fhi.ctx.be2
|
be2: BinExport2 = fhi.ctx.be2
|
||||||
@@ -63,7 +63,7 @@ def extract_function_name(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address
|
|||||||
yield FunctionName(vertex.mangled_name), fh.address
|
yield FunctionName(vertex.mangled_name), fh.address
|
||||||
|
|
||||||
|
|
||||||
def extract_features(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_features(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
for func_handler in FUNCTION_HANDLERS:
|
for func_handler in FUNCTION_HANDLERS:
|
||||||
for feature, addr in func_handler(fh):
|
for feature, addr in func_handler(fh):
|
||||||
yield feature, addr
|
yield feature, addr
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
import re
|
import re
|
||||||
from typing import Set, Dict, List, Tuple, Union, Iterator, Optional
|
from typing import Union, Iterator, Optional
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
|
||||||
@@ -22,7 +22,7 @@ HAS_ARCH_INTEL = {ARCH_I386, ARCH_AMD64}
|
|||||||
HAS_ARCH_ARM = {ARCH_AARCH64}
|
HAS_ARCH_ARM = {ARCH_AARCH64}
|
||||||
|
|
||||||
|
|
||||||
def mask_immediate(arch: Set[str], immediate: int) -> int:
|
def mask_immediate(arch: set[str], immediate: int) -> int:
|
||||||
if arch & HAS_ARCH64:
|
if arch & HAS_ARCH64:
|
||||||
immediate &= 0xFFFFFFFFFFFFFFFF
|
immediate &= 0xFFFFFFFFFFFFFFFF
|
||||||
elif arch & HAS_ARCH32:
|
elif arch & HAS_ARCH32:
|
||||||
@@ -30,7 +30,7 @@ def mask_immediate(arch: Set[str], immediate: int) -> int:
|
|||||||
return immediate
|
return immediate
|
||||||
|
|
||||||
|
|
||||||
def twos_complement(arch: Set[str], immediate: int, default: Optional[int] = None) -> int:
|
def twos_complement(arch: set[str], immediate: int, default: Optional[int] = None) -> int:
|
||||||
if default is not None:
|
if default is not None:
|
||||||
return capa.features.extractors.helpers.twos_complement(immediate, default)
|
return capa.features.extractors.helpers.twos_complement(immediate, default)
|
||||||
elif arch & HAS_ARCH64:
|
elif arch & HAS_ARCH64:
|
||||||
@@ -50,17 +50,36 @@ def is_vertex_type(vertex: BinExport2.CallGraph.Vertex, type_: BinExport2.CallGr
|
|||||||
return vertex.HasField("type") and vertex.type == type_
|
return vertex.HasField("type") and vertex.type == type_
|
||||||
|
|
||||||
|
|
||||||
|
# internal to `build_expression_tree`
|
||||||
|
# this is unstable: it is subject to change, so don't rely on it!
|
||||||
|
def _prune_expression_tree_references_to_tree_index(
|
||||||
|
expression_tree: list[list[int]],
|
||||||
|
tree_index: int,
|
||||||
|
):
|
||||||
|
# `i` is the index of the tree node that we'll search for `tree_index`
|
||||||
|
# if we remove `tree_index` from it, and it is now empty,
|
||||||
|
# then we'll need to prune references to `i`.
|
||||||
|
for i, tree_node in enumerate(expression_tree):
|
||||||
|
if tree_index in tree_node:
|
||||||
|
tree_node.remove(tree_index)
|
||||||
|
|
||||||
|
if len(tree_node) == 0:
|
||||||
|
# if the parent node is now empty,
|
||||||
|
# remove references to that parent node.
|
||||||
|
_prune_expression_tree_references_to_tree_index(expression_tree, i)
|
||||||
|
|
||||||
|
|
||||||
# internal to `build_expression_tree`
|
# internal to `build_expression_tree`
|
||||||
# this is unstable: it is subject to change, so don't rely on it!
|
# this is unstable: it is subject to change, so don't rely on it!
|
||||||
def _prune_expression_tree_empty_shifts(
|
def _prune_expression_tree_empty_shifts(
|
||||||
be2: BinExport2,
|
be2: BinExport2,
|
||||||
operand: BinExport2.Operand,
|
operand: BinExport2.Operand,
|
||||||
expression_tree: List[List[int]],
|
expression_tree: list[list[int]],
|
||||||
tree_index: int,
|
tree_index: int,
|
||||||
):
|
):
|
||||||
expression_index = operand.expression_index[tree_index]
|
expression_index = operand.expression_index[tree_index]
|
||||||
expression = be2.expression[expression_index]
|
expression = be2.expression[expression_index]
|
||||||
children_tree_indexes: List[int] = expression_tree[tree_index]
|
children_tree_indexes: list[int] = expression_tree[tree_index]
|
||||||
|
|
||||||
if expression.type == BinExport2.Expression.OPERATOR:
|
if expression.type == BinExport2.Expression.OPERATOR:
|
||||||
if len(children_tree_indexes) == 0 and expression.symbol in ("lsl", "lsr"):
|
if len(children_tree_indexes) == 0 and expression.symbol in ("lsl", "lsr"):
|
||||||
@@ -70,9 +89,7 @@ def _prune_expression_tree_empty_shifts(
|
|||||||
#
|
#
|
||||||
# Which seems to be as if the shift wasn't there (shift of #0)
|
# Which seems to be as if the shift wasn't there (shift of #0)
|
||||||
# so we want to remove references to this node from any parent nodes.
|
# so we want to remove references to this node from any parent nodes.
|
||||||
for tree_node in expression_tree:
|
_prune_expression_tree_references_to_tree_index(expression_tree, tree_index)
|
||||||
if tree_index in tree_node:
|
|
||||||
tree_node.remove(tree_index)
|
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
@@ -82,38 +99,37 @@ def _prune_expression_tree_empty_shifts(
|
|||||||
|
|
||||||
# internal to `build_expression_tree`
|
# internal to `build_expression_tree`
|
||||||
# this is unstable: it is subject to change, so don't rely on it!
|
# this is unstable: it is subject to change, so don't rely on it!
|
||||||
def _prune_expression_tree_empty_commas(
|
def _fixup_expression_tree_references_to_tree_index(
|
||||||
|
expression_tree: list[list[int]],
|
||||||
|
existing_index: int,
|
||||||
|
new_index: int,
|
||||||
|
):
|
||||||
|
for tree_node in expression_tree:
|
||||||
|
for i, index in enumerate(tree_node):
|
||||||
|
if index == existing_index:
|
||||||
|
tree_node[i] = new_index
|
||||||
|
|
||||||
|
|
||||||
|
# internal to `build_expression_tree`
|
||||||
|
# this is unstable: it is subject to change, so don't rely on it!
|
||||||
|
def _fixup_expression_tree_lonely_commas(
|
||||||
be2: BinExport2,
|
be2: BinExport2,
|
||||||
operand: BinExport2.Operand,
|
operand: BinExport2.Operand,
|
||||||
expression_tree: List[List[int]],
|
expression_tree: list[list[int]],
|
||||||
tree_index: int,
|
tree_index: int,
|
||||||
):
|
):
|
||||||
expression_index = operand.expression_index[tree_index]
|
expression_index = operand.expression_index[tree_index]
|
||||||
expression = be2.expression[expression_index]
|
expression = be2.expression[expression_index]
|
||||||
children_tree_indexes: List[int] = expression_tree[tree_index]
|
children_tree_indexes: list[int] = expression_tree[tree_index]
|
||||||
|
|
||||||
if expression.type == BinExport2.Expression.OPERATOR:
|
if expression.type == BinExport2.Expression.OPERATOR:
|
||||||
if len(children_tree_indexes) == 1 and expression.symbol == ",":
|
if len(children_tree_indexes) == 1 and expression.symbol == ",":
|
||||||
# Due to the above pruning of empty LSL or LSR expressions,
|
existing_index = tree_index
|
||||||
# the parents might need to be fixed up.
|
new_index = children_tree_indexes[0]
|
||||||
#
|
_fixup_expression_tree_references_to_tree_index(expression_tree, existing_index, new_index)
|
||||||
# Specifically, if the pruned node was part of a comma list with two children,
|
|
||||||
# now there's only a single child, which renders as an extra comma,
|
|
||||||
# so we replace references to the comma node with the immediate child.
|
|
||||||
#
|
|
||||||
# A more correct way of doing this might be to walk up the parents and do fixups,
|
|
||||||
# but I'm not quite sure how to do this yet. Just do two passes right now.
|
|
||||||
child = children_tree_indexes[0]
|
|
||||||
|
|
||||||
for tree_node in expression_tree:
|
|
||||||
tree_node.index
|
|
||||||
if tree_index in tree_node:
|
|
||||||
tree_node[tree_node.index(tree_index)] = child
|
|
||||||
|
|
||||||
return
|
|
||||||
|
|
||||||
for child_tree_index in children_tree_indexes:
|
for child_tree_index in children_tree_indexes:
|
||||||
_prune_expression_tree_empty_commas(be2, operand, expression_tree, child_tree_index)
|
_fixup_expression_tree_lonely_commas(be2, operand, expression_tree, child_tree_index)
|
||||||
|
|
||||||
|
|
||||||
# internal to `build_expression_tree`
|
# internal to `build_expression_tree`
|
||||||
@@ -121,17 +137,17 @@ def _prune_expression_tree_empty_commas(
|
|||||||
def _prune_expression_tree(
|
def _prune_expression_tree(
|
||||||
be2: BinExport2,
|
be2: BinExport2,
|
||||||
operand: BinExport2.Operand,
|
operand: BinExport2.Operand,
|
||||||
expression_tree: List[List[int]],
|
expression_tree: list[list[int]],
|
||||||
):
|
):
|
||||||
_prune_expression_tree_empty_shifts(be2, operand, expression_tree, 0)
|
_prune_expression_tree_empty_shifts(be2, operand, expression_tree, 0)
|
||||||
_prune_expression_tree_empty_commas(be2, operand, expression_tree, 0)
|
_fixup_expression_tree_lonely_commas(be2, operand, expression_tree, 0)
|
||||||
|
|
||||||
|
|
||||||
# this is unstable: it is subject to change, so don't rely on it!
|
# this is unstable: it is subject to change, so don't rely on it!
|
||||||
def _build_expression_tree(
|
def _build_expression_tree(
|
||||||
be2: BinExport2,
|
be2: BinExport2,
|
||||||
operand: BinExport2.Operand,
|
operand: BinExport2.Operand,
|
||||||
) -> List[List[int]]:
|
) -> list[list[int]]:
|
||||||
# The reconstructed expression tree layout, linking parent nodes to their children.
|
# The reconstructed expression tree layout, linking parent nodes to their children.
|
||||||
#
|
#
|
||||||
# There is one list of integers for each expression in the operand.
|
# There is one list of integers for each expression in the operand.
|
||||||
@@ -159,7 +175,7 @@ def _build_expression_tree(
|
|||||||
# exist (see https://github.com/NationalSecurityAgency/ghidra/issues/6817)
|
# exist (see https://github.com/NationalSecurityAgency/ghidra/issues/6817)
|
||||||
return []
|
return []
|
||||||
|
|
||||||
tree: List[List[int]] = []
|
tree: list[list[int]] = []
|
||||||
for i, expression_index in enumerate(operand.expression_index):
|
for i, expression_index in enumerate(operand.expression_index):
|
||||||
children = []
|
children = []
|
||||||
|
|
||||||
@@ -173,7 +189,6 @@ def _build_expression_tree(
|
|||||||
tree.append(children)
|
tree.append(children)
|
||||||
|
|
||||||
_prune_expression_tree(be2, operand, tree)
|
_prune_expression_tree(be2, operand, tree)
|
||||||
_prune_expression_tree(be2, operand, tree)
|
|
||||||
|
|
||||||
return tree
|
return tree
|
||||||
|
|
||||||
@@ -181,21 +196,34 @@ def _build_expression_tree(
|
|||||||
def _fill_operand_expression_list(
|
def _fill_operand_expression_list(
|
||||||
be2: BinExport2,
|
be2: BinExport2,
|
||||||
operand: BinExport2.Operand,
|
operand: BinExport2.Operand,
|
||||||
expression_tree: List[List[int]],
|
expression_tree: list[list[int]],
|
||||||
tree_index: int,
|
tree_index: int,
|
||||||
expression_list: List[BinExport2.Expression],
|
expression_list: list[BinExport2.Expression],
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Walk the given expression tree and collect the expression nodes in-order.
|
Walk the given expression tree and collect the expression nodes in-order.
|
||||||
"""
|
"""
|
||||||
expression_index = operand.expression_index[tree_index]
|
expression_index = operand.expression_index[tree_index]
|
||||||
expression = be2.expression[expression_index]
|
expression = be2.expression[expression_index]
|
||||||
children_tree_indexes: List[int] = expression_tree[tree_index]
|
children_tree_indexes: list[int] = expression_tree[tree_index]
|
||||||
|
|
||||||
if expression.type == BinExport2.Expression.REGISTER:
|
if expression.type == BinExport2.Expression.REGISTER:
|
||||||
assert len(children_tree_indexes) == 0
|
assert len(children_tree_indexes) <= 1
|
||||||
expression_list.append(expression)
|
expression_list.append(expression)
|
||||||
return
|
|
||||||
|
if len(children_tree_indexes) == 0:
|
||||||
|
return
|
||||||
|
elif len(children_tree_indexes) == 1:
|
||||||
|
# like for aarch64 with vector instructions, indicating vector data size:
|
||||||
|
#
|
||||||
|
# FADD V0.4S, V1.4S, V2.4S
|
||||||
|
#
|
||||||
|
# see: https://github.com/mandiant/capa/issues/2528
|
||||||
|
child_index = children_tree_indexes[0]
|
||||||
|
_fill_operand_expression_list(be2, operand, expression_tree, child_index, expression_list)
|
||||||
|
return
|
||||||
|
else:
|
||||||
|
raise NotImplementedError(len(children_tree_indexes))
|
||||||
|
|
||||||
elif expression.type == BinExport2.Expression.SYMBOL:
|
elif expression.type == BinExport2.Expression.SYMBOL:
|
||||||
assert len(children_tree_indexes) <= 1
|
assert len(children_tree_indexes) <= 1
|
||||||
@@ -218,9 +246,23 @@ def _fill_operand_expression_list(
|
|||||||
raise NotImplementedError(len(children_tree_indexes))
|
raise NotImplementedError(len(children_tree_indexes))
|
||||||
|
|
||||||
elif expression.type == BinExport2.Expression.IMMEDIATE_INT:
|
elif expression.type == BinExport2.Expression.IMMEDIATE_INT:
|
||||||
assert len(children_tree_indexes) == 0
|
assert len(children_tree_indexes) <= 1
|
||||||
expression_list.append(expression)
|
expression_list.append(expression)
|
||||||
return
|
|
||||||
|
if len(children_tree_indexes) == 0:
|
||||||
|
return
|
||||||
|
elif len(children_tree_indexes) == 1:
|
||||||
|
# the ghidra exporter can produce some weird expressions,
|
||||||
|
# particularly for MSRs, like for:
|
||||||
|
#
|
||||||
|
# sreg(3, 0, c.0, c.4, 4)
|
||||||
|
#
|
||||||
|
# see: https://github.com/mandiant/capa/issues/2530
|
||||||
|
child_index = children_tree_indexes[0]
|
||||||
|
_fill_operand_expression_list(be2, operand, expression_tree, child_index, expression_list)
|
||||||
|
return
|
||||||
|
else:
|
||||||
|
raise NotImplementedError(len(children_tree_indexes))
|
||||||
|
|
||||||
elif expression.type == BinExport2.Expression.SIZE_PREFIX:
|
elif expression.type == BinExport2.Expression.SIZE_PREFIX:
|
||||||
# like: b4
|
# like: b4
|
||||||
@@ -282,10 +324,10 @@ def _fill_operand_expression_list(
|
|||||||
raise NotImplementedError(expression.type)
|
raise NotImplementedError(expression.type)
|
||||||
|
|
||||||
|
|
||||||
def get_operand_expressions(be2: BinExport2, op: BinExport2.Operand) -> List[BinExport2.Expression]:
|
def get_operand_expressions(be2: BinExport2, op: BinExport2.Operand) -> list[BinExport2.Expression]:
|
||||||
tree = _build_expression_tree(be2, op)
|
tree = _build_expression_tree(be2, op)
|
||||||
|
|
||||||
expressions: List[BinExport2.Expression] = []
|
expressions: list[BinExport2.Expression] = []
|
||||||
_fill_operand_expression_list(be2, op, tree, 0, expressions)
|
_fill_operand_expression_list(be2, op, tree, 0, expressions)
|
||||||
|
|
||||||
return expressions
|
return expressions
|
||||||
@@ -331,11 +373,11 @@ def get_instruction_mnemonic(be2: BinExport2, instruction: BinExport2.Instructio
|
|||||||
return be2.mnemonic[instruction.mnemonic_index].name.lower()
|
return be2.mnemonic[instruction.mnemonic_index].name.lower()
|
||||||
|
|
||||||
|
|
||||||
def get_instruction_operands(be2: BinExport2, instruction: BinExport2.Instruction) -> List[BinExport2.Operand]:
|
def get_instruction_operands(be2: BinExport2, instruction: BinExport2.Instruction) -> list[BinExport2.Operand]:
|
||||||
return [be2.operand[operand_index] for operand_index in instruction.operand_index]
|
return [be2.operand[operand_index] for operand_index in instruction.operand_index]
|
||||||
|
|
||||||
|
|
||||||
def split_with_delimiters(s: str, delimiters: Tuple[str, ...]) -> Iterator[str]:
|
def split_with_delimiters(s: str, delimiters: tuple[str, ...]) -> Iterator[str]:
|
||||||
"""
|
"""
|
||||||
Splits a string by any of the provided delimiter characters,
|
Splits a string by any of the provided delimiter characters,
|
||||||
including the delimiters in the results.
|
including the delimiters in the results.
|
||||||
@@ -355,7 +397,7 @@ def split_with_delimiters(s: str, delimiters: Tuple[str, ...]) -> Iterator[str]:
|
|||||||
yield s[start:]
|
yield s[start:]
|
||||||
|
|
||||||
|
|
||||||
BinExport2OperandPattern = Union[str, Tuple[str, ...]]
|
BinExport2OperandPattern = Union[str, tuple[str, ...]]
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@@ -382,8 +424,8 @@ class BinExport2InstructionPattern:
|
|||||||
This matcher uses the BinExport2 data layout under the hood.
|
This matcher uses the BinExport2 data layout under the hood.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
mnemonics: Tuple[str, ...]
|
mnemonics: tuple[str, ...]
|
||||||
operands: Tuple[Union[str, BinExport2OperandPattern], ...]
|
operands: tuple[Union[str, BinExport2OperandPattern], ...]
|
||||||
capture: Optional[str]
|
capture: Optional[str]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@@ -438,7 +480,7 @@ class BinExport2InstructionPattern:
|
|||||||
mnemonic, _, rest = pattern.partition(" ")
|
mnemonic, _, rest = pattern.partition(" ")
|
||||||
mnemonics = mnemonic.split("|")
|
mnemonics = mnemonic.split("|")
|
||||||
|
|
||||||
operands: List[Union[str, Tuple[str, ...]]] = []
|
operands: list[Union[str, tuple[str, ...]]] = []
|
||||||
while rest:
|
while rest:
|
||||||
rest = rest.strip()
|
rest = rest.strip()
|
||||||
if not rest.startswith("["):
|
if not rest.startswith("["):
|
||||||
@@ -509,7 +551,7 @@ class BinExport2InstructionPattern:
|
|||||||
expression: BinExport2.Expression
|
expression: BinExport2.Expression
|
||||||
|
|
||||||
def match(
|
def match(
|
||||||
self, mnemonic: str, operand_expressions: List[List[BinExport2.Expression]]
|
self, mnemonic: str, operand_expressions: list[list[BinExport2.Expression]]
|
||||||
) -> Optional["BinExport2InstructionPattern.MatchResult"]:
|
) -> Optional["BinExport2InstructionPattern.MatchResult"]:
|
||||||
"""
|
"""
|
||||||
Match the given BinExport2 data against this pattern.
|
Match the given BinExport2 data against this pattern.
|
||||||
@@ -602,10 +644,10 @@ class BinExport2InstructionPattern:
|
|||||||
class BinExport2InstructionPatternMatcher:
|
class BinExport2InstructionPatternMatcher:
|
||||||
"""Index and match a collection of instruction patterns."""
|
"""Index and match a collection of instruction patterns."""
|
||||||
|
|
||||||
def __init__(self, queries: List[BinExport2InstructionPattern]):
|
def __init__(self, queries: list[BinExport2InstructionPattern]):
|
||||||
self.queries = queries
|
self.queries = queries
|
||||||
# shard the patterns by (mnemonic, #operands)
|
# shard the patterns by (mnemonic, #operands)
|
||||||
self._index: Dict[Tuple[str, int], List[BinExport2InstructionPattern]] = defaultdict(list)
|
self._index: dict[tuple[str, int], list[BinExport2InstructionPattern]] = defaultdict(list)
|
||||||
|
|
||||||
for query in queries:
|
for query in queries:
|
||||||
for mnemonic in query.mnemonics:
|
for mnemonic in query.mnemonics:
|
||||||
@@ -623,7 +665,7 @@ class BinExport2InstructionPatternMatcher:
|
|||||||
)
|
)
|
||||||
|
|
||||||
def match(
|
def match(
|
||||||
self, mnemonic: str, operand_expressions: List[List[BinExport2.Expression]]
|
self, mnemonic: str, operand_expressions: list[list[BinExport2.Expression]]
|
||||||
) -> Optional[BinExport2InstructionPattern.MatchResult]:
|
) -> Optional[BinExport2InstructionPattern.MatchResult]:
|
||||||
queries = self._index.get((mnemonic.lower(), len(operand_expressions)), [])
|
queries = self._index.get((mnemonic.lower(), len(operand_expressions)), [])
|
||||||
for query in queries:
|
for query in queries:
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
import logging
|
import logging
|
||||||
from typing import List, Tuple, Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
import capa.features.extractors.helpers
|
import capa.features.extractors.helpers
|
||||||
import capa.features.extractors.strings
|
import capa.features.extractors.strings
|
||||||
@@ -32,7 +32,7 @@ from capa.features.extractors.binexport2.binexport2_pb2 import BinExport2
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_api_features(fh: FunctionHandle, _bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_insn_api_features(fh: FunctionHandle, _bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
fhi: FunctionContext = fh.inner
|
fhi: FunctionContext = fh.inner
|
||||||
ii: InstructionContext = ih.inner
|
ii: InstructionContext = ih.inner
|
||||||
|
|
||||||
@@ -68,7 +68,7 @@ def extract_insn_api_features(fh: FunctionHandle, _bbh: BBHandle, ih: InsnHandle
|
|||||||
|
|
||||||
def extract_insn_number_features(
|
def extract_insn_number_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
fhi: FunctionContext = fh.inner
|
fhi: FunctionContext = fh.inner
|
||||||
|
|
||||||
if fhi.arch & HAS_ARCH_INTEL:
|
if fhi.arch & HAS_ARCH_INTEL:
|
||||||
@@ -77,7 +77,7 @@ def extract_insn_number_features(
|
|||||||
yield from capa.features.extractors.binexport2.arch.arm.insn.extract_insn_number_features(fh, bbh, ih)
|
yield from capa.features.extractors.binexport2.arch.arm.insn.extract_insn_number_features(fh, bbh, ih)
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
fhi: FunctionContext = fh.inner
|
fhi: FunctionContext = fh.inner
|
||||||
ii: InstructionContext = ih.inner
|
ii: InstructionContext = ih.inner
|
||||||
|
|
||||||
@@ -92,7 +92,7 @@ def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandl
|
|||||||
# disassembler already identified string reference from instruction
|
# disassembler already identified string reference from instruction
|
||||||
return
|
return
|
||||||
|
|
||||||
reference_addresses: List[int] = []
|
reference_addresses: list[int] = []
|
||||||
|
|
||||||
if instruction_index in idx.data_reference_index_by_source_instruction_index:
|
if instruction_index in idx.data_reference_index_by_source_instruction_index:
|
||||||
for data_reference_index in idx.data_reference_index_by_source_instruction_index[instruction_index]:
|
for data_reference_index in idx.data_reference_index_by_source_instruction_index[instruction_index]:
|
||||||
@@ -142,7 +142,7 @@ def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandl
|
|||||||
|
|
||||||
def extract_insn_string_features(
|
def extract_insn_string_features(
|
||||||
fh: FunctionHandle, _bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, _bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
fhi: FunctionContext = fh.inner
|
fhi: FunctionContext = fh.inner
|
||||||
ii: InstructionContext = ih.inner
|
ii: InstructionContext = ih.inner
|
||||||
|
|
||||||
@@ -161,7 +161,7 @@ def extract_insn_string_features(
|
|||||||
|
|
||||||
def extract_insn_offset_features(
|
def extract_insn_offset_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
fhi: FunctionContext = fh.inner
|
fhi: FunctionContext = fh.inner
|
||||||
|
|
||||||
if fhi.arch & HAS_ARCH_INTEL:
|
if fhi.arch & HAS_ARCH_INTEL:
|
||||||
@@ -172,7 +172,7 @@ def extract_insn_offset_features(
|
|||||||
|
|
||||||
def extract_insn_nzxor_characteristic_features(
|
def extract_insn_nzxor_characteristic_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
fhi: FunctionContext = fh.inner
|
fhi: FunctionContext = fh.inner
|
||||||
|
|
||||||
if fhi.arch & HAS_ARCH_INTEL:
|
if fhi.arch & HAS_ARCH_INTEL:
|
||||||
@@ -187,7 +187,7 @@ def extract_insn_nzxor_characteristic_features(
|
|||||||
|
|
||||||
def extract_insn_mnemonic_features(
|
def extract_insn_mnemonic_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
fhi: FunctionContext = fh.inner
|
fhi: FunctionContext = fh.inner
|
||||||
ii: InstructionContext = ih.inner
|
ii: InstructionContext = ih.inner
|
||||||
|
|
||||||
@@ -199,7 +199,7 @@ def extract_insn_mnemonic_features(
|
|||||||
yield Mnemonic(mnemonic_name), ih.address
|
yield Mnemonic(mnemonic_name), ih.address
|
||||||
|
|
||||||
|
|
||||||
def extract_function_calls_from(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_function_calls_from(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""extract functions calls from features
|
"""extract functions calls from features
|
||||||
|
|
||||||
most relevant at the function scope;
|
most relevant at the function scope;
|
||||||
@@ -221,7 +221,7 @@ def extract_function_calls_from(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandl
|
|||||||
|
|
||||||
def extract_function_indirect_call_characteristic_features(
|
def extract_function_indirect_call_characteristic_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
fhi: FunctionContext = fh.inner
|
fhi: FunctionContext = fh.inner
|
||||||
|
|
||||||
if fhi.arch & HAS_ARCH_INTEL:
|
if fhi.arch & HAS_ARCH_INTEL:
|
||||||
@@ -234,7 +234,7 @@ def extract_function_indirect_call_characteristic_features(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def extract_features(f: FunctionHandle, bbh: BBHandle, insn: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_features(f: FunctionHandle, bbh: BBHandle, insn: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""extract instruction features"""
|
"""extract instruction features"""
|
||||||
for inst_handler in INSTRUCTION_HANDLERS:
|
for inst_handler in INSTRUCTION_HANDLERS:
|
||||||
for feature, ea in inst_handler(f, bbh, insn):
|
for feature, ea in inst_handler(f, bbh, insn):
|
||||||
|
|||||||
@@ -5,115 +5,25 @@
|
|||||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
from typing import Iterator
|
||||||
|
|
||||||
import string
|
|
||||||
from typing import Tuple, Iterator
|
|
||||||
|
|
||||||
from binaryninja import Function
|
|
||||||
from binaryninja import BasicBlock as BinjaBasicBlock
|
from binaryninja import BasicBlock as BinjaBasicBlock
|
||||||
from binaryninja import (
|
|
||||||
BinaryView,
|
|
||||||
SymbolType,
|
|
||||||
RegisterValueType,
|
|
||||||
VariableSourceType,
|
|
||||||
MediumLevelILOperation,
|
|
||||||
MediumLevelILBasicBlock,
|
|
||||||
MediumLevelILInstruction,
|
|
||||||
)
|
|
||||||
|
|
||||||
from capa.features.common import Feature, Characteristic
|
from capa.features.common import Feature, Characteristic
|
||||||
from capa.features.address import Address
|
from capa.features.address import Address
|
||||||
from capa.features.basicblock import BasicBlock
|
from capa.features.basicblock import BasicBlock
|
||||||
from capa.features.extractors.helpers import MIN_STACKSTRING_LEN
|
|
||||||
from capa.features.extractors.base_extractor import BBHandle, FunctionHandle
|
from capa.features.extractors.base_extractor import BBHandle, FunctionHandle
|
||||||
|
|
||||||
|
|
||||||
def get_printable_len_ascii(s: bytes) -> int:
|
def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""Return string length if all operand bytes are ascii or utf16-le printable"""
|
|
||||||
count = 0
|
|
||||||
for c in s:
|
|
||||||
if c == 0:
|
|
||||||
return count
|
|
||||||
if c < 127 and chr(c) in string.printable:
|
|
||||||
count += 1
|
|
||||||
return count
|
|
||||||
|
|
||||||
|
|
||||||
def get_printable_len_wide(s: bytes) -> int:
|
|
||||||
"""Return string length if all operand bytes are ascii or utf16-le printable"""
|
|
||||||
if all(c == 0x00 for c in s[1::2]):
|
|
||||||
return get_printable_len_ascii(s[::2])
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
def get_stack_string_len(f: Function, il: MediumLevelILInstruction) -> int:
|
|
||||||
bv: BinaryView = f.view
|
|
||||||
|
|
||||||
if il.operation != MediumLevelILOperation.MLIL_CALL:
|
|
||||||
return 0
|
|
||||||
|
|
||||||
target = il.dest
|
|
||||||
if target.operation not in [MediumLevelILOperation.MLIL_CONST, MediumLevelILOperation.MLIL_CONST_PTR]:
|
|
||||||
return 0
|
|
||||||
|
|
||||||
addr = target.value.value
|
|
||||||
sym = bv.get_symbol_at(addr)
|
|
||||||
if not sym or sym.type not in [SymbolType.LibraryFunctionSymbol, SymbolType.SymbolicFunctionSymbol]:
|
|
||||||
return 0
|
|
||||||
|
|
||||||
if sym.name not in ["__builtin_strncpy", "__builtin_strcpy", "__builtin_wcscpy"]:
|
|
||||||
return 0
|
|
||||||
|
|
||||||
if len(il.params) < 2:
|
|
||||||
return 0
|
|
||||||
|
|
||||||
dest = il.params[0]
|
|
||||||
if dest.operation in [MediumLevelILOperation.MLIL_ADDRESS_OF, MediumLevelILOperation.MLIL_VAR]:
|
|
||||||
var = dest.src
|
|
||||||
else:
|
|
||||||
return 0
|
|
||||||
|
|
||||||
if var.source_type != VariableSourceType.StackVariableSourceType:
|
|
||||||
return 0
|
|
||||||
|
|
||||||
src = il.params[1]
|
|
||||||
if src.value.type != RegisterValueType.ConstantDataAggregateValue:
|
|
||||||
return 0
|
|
||||||
|
|
||||||
s = f.get_constant_data(RegisterValueType.ConstantDataAggregateValue, src.value.value)
|
|
||||||
return max(get_printable_len_ascii(bytes(s)), get_printable_len_wide(bytes(s)))
|
|
||||||
|
|
||||||
|
|
||||||
def bb_contains_stackstring(f: Function, bb: MediumLevelILBasicBlock) -> bool:
|
|
||||||
"""check basic block for stackstring indicators
|
|
||||||
|
|
||||||
true if basic block contains enough moves of constant bytes to the stack
|
|
||||||
"""
|
|
||||||
count = 0
|
|
||||||
for il in bb:
|
|
||||||
count += get_stack_string_len(f, il)
|
|
||||||
if count > MIN_STACKSTRING_LEN:
|
|
||||||
return True
|
|
||||||
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
def extract_bb_stackstring(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
|
||||||
"""extract stackstring indicators from basic block"""
|
|
||||||
bb: Tuple[BinjaBasicBlock, MediumLevelILBasicBlock] = bbh.inner
|
|
||||||
if bb[1] is not None and bb_contains_stackstring(fh.inner, bb[1]):
|
|
||||||
yield Characteristic("stack string"), bbh.address
|
|
||||||
|
|
||||||
|
|
||||||
def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
|
||||||
"""extract tight loop indicators from a basic block"""
|
"""extract tight loop indicators from a basic block"""
|
||||||
bb: Tuple[BinjaBasicBlock, MediumLevelILBasicBlock] = bbh.inner
|
bb: BinjaBasicBlock = bbh.inner
|
||||||
for edge in bb[0].outgoing_edges:
|
for edge in bb.outgoing_edges:
|
||||||
if edge.target.start == bb[0].start:
|
if edge.target.start == bb.start:
|
||||||
yield Characteristic("tight loop"), bbh.address
|
yield Characteristic("tight loop"), bbh.address
|
||||||
|
|
||||||
|
|
||||||
def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""extract basic block features"""
|
"""extract basic block features"""
|
||||||
for bb_handler in BASIC_BLOCK_HANDLERS:
|
for bb_handler in BASIC_BLOCK_HANDLERS:
|
||||||
for feature, addr in bb_handler(fh, bbh):
|
for feature, addr in bb_handler(fh, bbh):
|
||||||
@@ -121,7 +31,4 @@ def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Featur
|
|||||||
yield BasicBlock(), bbh.address
|
yield BasicBlock(), bbh.address
|
||||||
|
|
||||||
|
|
||||||
BASIC_BLOCK_HANDLERS = (
|
BASIC_BLOCK_HANDLERS = (extract_bb_tight_loop,)
|
||||||
extract_bb_tight_loop,
|
|
||||||
extract_bb_stackstring,
|
|
||||||
)
|
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
from typing import List, Tuple, Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
import binaryninja as binja
|
import binaryninja as binja
|
||||||
|
|
||||||
@@ -30,7 +30,7 @@ class BinjaFeatureExtractor(StaticFeatureExtractor):
|
|||||||
def __init__(self, bv: binja.BinaryView):
|
def __init__(self, bv: binja.BinaryView):
|
||||||
super().__init__(hashes=SampleHashes.from_bytes(bv.file.raw.read(0, bv.file.raw.length)))
|
super().__init__(hashes=SampleHashes.from_bytes(bv.file.raw.read(0, bv.file.raw.length)))
|
||||||
self.bv = bv
|
self.bv = bv
|
||||||
self.global_features: List[Tuple[Feature, Address]] = []
|
self.global_features: list[tuple[Feature, Address]] = []
|
||||||
self.global_features.extend(capa.features.extractors.binja.file.extract_file_format(self.bv))
|
self.global_features.extend(capa.features.extractors.binja.file.extract_file_format(self.bv))
|
||||||
self.global_features.extend(capa.features.extractors.binja.global_.extract_os(self.bv))
|
self.global_features.extend(capa.features.extractors.binja.global_.extract_os(self.bv))
|
||||||
self.global_features.extend(capa.features.extractors.binja.global_.extract_arch(self.bv))
|
self.global_features.extend(capa.features.extractors.binja.global_.extract_arch(self.bv))
|
||||||
@@ -48,31 +48,24 @@ class BinjaFeatureExtractor(StaticFeatureExtractor):
|
|||||||
for f in self.bv.functions:
|
for f in self.bv.functions:
|
||||||
yield FunctionHandle(address=AbsoluteVirtualAddress(f.start), inner=f)
|
yield FunctionHandle(address=AbsoluteVirtualAddress(f.start), inner=f)
|
||||||
|
|
||||||
def extract_function_features(self, fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_function_features(self, fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.binja.function.extract_features(fh)
|
yield from capa.features.extractors.binja.function.extract_features(fh)
|
||||||
|
|
||||||
def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]:
|
def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]:
|
||||||
f: binja.Function = fh.inner
|
f: binja.Function = fh.inner
|
||||||
# Set up a MLIL basic block dict look up to associate the disassembly basic block with its MLIL basic block
|
|
||||||
mlil_lookup = {}
|
|
||||||
for mlil_bb in f.mlil.basic_blocks:
|
|
||||||
mlil_lookup[mlil_bb.source_block.start] = mlil_bb
|
|
||||||
|
|
||||||
for bb in f.basic_blocks:
|
for bb in f.basic_blocks:
|
||||||
mlil_bb = mlil_lookup.get(bb.start)
|
yield BBHandle(address=AbsoluteVirtualAddress(bb.start), inner=bb)
|
||||||
|
|
||||||
yield BBHandle(address=AbsoluteVirtualAddress(bb.start), inner=(bb, mlil_bb))
|
def extract_basic_block_features(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
|
|
||||||
def extract_basic_block_features(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
|
||||||
yield from capa.features.extractors.binja.basicblock.extract_features(fh, bbh)
|
yield from capa.features.extractors.binja.basicblock.extract_features(fh, bbh)
|
||||||
|
|
||||||
def get_instructions(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[InsnHandle]:
|
def get_instructions(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[InsnHandle]:
|
||||||
import capa.features.extractors.binja.helpers as binja_helpers
|
import capa.features.extractors.binja.helpers as binja_helpers
|
||||||
|
|
||||||
bb: Tuple[binja.BasicBlock, binja.MediumLevelILBasicBlock] = bbh.inner
|
bb: binja.BasicBlock = bbh.inner
|
||||||
addr = bb[0].start
|
addr = bb.start
|
||||||
|
|
||||||
for text, length in bb[0]:
|
for text, length in bb:
|
||||||
insn = binja_helpers.DisassemblyInstruction(addr, length, text)
|
insn = binja_helpers.DisassemblyInstruction(addr, length, text)
|
||||||
yield InsnHandle(address=AbsoluteVirtualAddress(addr), inner=insn)
|
yield InsnHandle(address=AbsoluteVirtualAddress(addr), inner=insn)
|
||||||
addr += length
|
addr += length
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
from typing import Tuple, Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
from binaryninja import Segment, BinaryView, SymbolType, SymbolBinding
|
from binaryninja import Segment, BinaryView, SymbolType, SymbolBinding
|
||||||
|
|
||||||
@@ -13,12 +13,22 @@ import capa.features.extractors.common
|
|||||||
import capa.features.extractors.helpers
|
import capa.features.extractors.helpers
|
||||||
import capa.features.extractors.strings
|
import capa.features.extractors.strings
|
||||||
from capa.features.file import Export, Import, Section, FunctionName
|
from capa.features.file import Export, Import, Section, FunctionName
|
||||||
from capa.features.common import FORMAT_PE, FORMAT_ELF, Format, String, Feature, Characteristic
|
from capa.features.common import (
|
||||||
|
FORMAT_PE,
|
||||||
|
FORMAT_ELF,
|
||||||
|
FORMAT_SC32,
|
||||||
|
FORMAT_SC64,
|
||||||
|
FORMAT_BINJA_DB,
|
||||||
|
Format,
|
||||||
|
String,
|
||||||
|
Feature,
|
||||||
|
Characteristic,
|
||||||
|
)
|
||||||
from capa.features.address import NO_ADDRESS, Address, FileOffsetAddress, AbsoluteVirtualAddress
|
from capa.features.address import NO_ADDRESS, Address, FileOffsetAddress, AbsoluteVirtualAddress
|
||||||
from capa.features.extractors.binja.helpers import read_c_string, unmangle_c_name
|
from capa.features.extractors.binja.helpers import read_c_string, unmangle_c_name
|
||||||
|
|
||||||
|
|
||||||
def check_segment_for_pe(bv: BinaryView, seg: Segment) -> Iterator[Tuple[Feature, Address]]:
|
def check_segment_for_pe(bv: BinaryView, seg: Segment) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""check segment for embedded PE"""
|
"""check segment for embedded PE"""
|
||||||
start = 0
|
start = 0
|
||||||
if bv.view_type == "PE" and seg.start == bv.start:
|
if bv.view_type == "PE" and seg.start == bv.start:
|
||||||
@@ -32,13 +42,13 @@ def check_segment_for_pe(bv: BinaryView, seg: Segment) -> Iterator[Tuple[Feature
|
|||||||
yield Characteristic("embedded pe"), FileOffsetAddress(seg.start + offset)
|
yield Characteristic("embedded pe"), FileOffsetAddress(seg.start + offset)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_embedded_pe(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
def extract_file_embedded_pe(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""extract embedded PE features"""
|
"""extract embedded PE features"""
|
||||||
for seg in bv.segments:
|
for seg in bv.segments:
|
||||||
yield from check_segment_for_pe(bv, seg)
|
yield from check_segment_for_pe(bv, seg)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_export_names(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
def extract_file_export_names(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""extract function exports"""
|
"""extract function exports"""
|
||||||
for sym in bv.get_symbols_of_type(SymbolType.FunctionSymbol) + bv.get_symbols_of_type(SymbolType.DataSymbol):
|
for sym in bv.get_symbols_of_type(SymbolType.FunctionSymbol) + bv.get_symbols_of_type(SymbolType.DataSymbol):
|
||||||
if sym.binding in [SymbolBinding.GlobalBinding, SymbolBinding.WeakBinding]:
|
if sym.binding in [SymbolBinding.GlobalBinding, SymbolBinding.WeakBinding]:
|
||||||
@@ -72,7 +82,7 @@ def extract_file_export_names(bv: BinaryView) -> Iterator[Tuple[Feature, Address
|
|||||||
yield Characteristic("forwarded export"), AbsoluteVirtualAddress(sym.address)
|
yield Characteristic("forwarded export"), AbsoluteVirtualAddress(sym.address)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_import_names(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
def extract_file_import_names(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""extract function imports
|
"""extract function imports
|
||||||
|
|
||||||
1. imports by ordinal:
|
1. imports by ordinal:
|
||||||
@@ -96,19 +106,19 @@ def extract_file_import_names(bv: BinaryView) -> Iterator[Tuple[Feature, Address
|
|||||||
yield Import(name), addr
|
yield Import(name), addr
|
||||||
|
|
||||||
|
|
||||||
def extract_file_section_names(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
def extract_file_section_names(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""extract section names"""
|
"""extract section names"""
|
||||||
for name, section in bv.sections.items():
|
for name, section in bv.sections.items():
|
||||||
yield Section(name), AbsoluteVirtualAddress(section.start)
|
yield Section(name), AbsoluteVirtualAddress(section.start)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_strings(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
def extract_file_strings(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""extract ASCII and UTF-16 LE strings"""
|
"""extract ASCII and UTF-16 LE strings"""
|
||||||
for s in bv.strings:
|
for s in bv.strings:
|
||||||
yield String(s.value), FileOffsetAddress(s.start)
|
yield String(s.value), FileOffsetAddress(s.start)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_function_names(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
def extract_file_function_names(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
extract the names of statically-linked library functions.
|
extract the names of statically-linked library functions.
|
||||||
"""
|
"""
|
||||||
@@ -127,12 +137,22 @@ def extract_file_function_names(bv: BinaryView) -> Iterator[Tuple[Feature, Addre
|
|||||||
yield FunctionName(name[1:]), sym.address
|
yield FunctionName(name[1:]), sym.address
|
||||||
|
|
||||||
|
|
||||||
def extract_file_format(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
def extract_file_format(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
|
||||||
|
if bv.file.database is not None:
|
||||||
|
yield Format(FORMAT_BINJA_DB), NO_ADDRESS
|
||||||
|
|
||||||
view_type = bv.view_type
|
view_type = bv.view_type
|
||||||
if view_type in ["PE", "COFF"]:
|
if view_type in ["PE", "COFF"]:
|
||||||
yield Format(FORMAT_PE), NO_ADDRESS
|
yield Format(FORMAT_PE), NO_ADDRESS
|
||||||
elif view_type == "ELF":
|
elif view_type == "ELF":
|
||||||
yield Format(FORMAT_ELF), NO_ADDRESS
|
yield Format(FORMAT_ELF), NO_ADDRESS
|
||||||
|
elif view_type == "Mapped":
|
||||||
|
if bv.arch.name == "x86":
|
||||||
|
yield Format(FORMAT_SC32), NO_ADDRESS
|
||||||
|
elif bv.arch.name == "x86_64":
|
||||||
|
yield Format(FORMAT_SC64), NO_ADDRESS
|
||||||
|
else:
|
||||||
|
raise NotImplementedError(f"unexpected raw file with arch: {bv.arch}")
|
||||||
elif view_type == "Raw":
|
elif view_type == "Raw":
|
||||||
# no file type to return when processing a binary file, but we want to continue processing
|
# no file type to return when processing a binary file, but we want to continue processing
|
||||||
return
|
return
|
||||||
@@ -140,7 +160,7 @@ def extract_file_format(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
|||||||
raise NotImplementedError(f"unexpected file format: {view_type}")
|
raise NotImplementedError(f"unexpected file format: {view_type}")
|
||||||
|
|
||||||
|
|
||||||
def extract_features(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
def extract_features(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""extract file features"""
|
"""extract file features"""
|
||||||
for file_handler in FILE_HANDLERS:
|
for file_handler in FILE_HANDLERS:
|
||||||
for feature, addr in file_handler(bv):
|
for feature, addr in file_handler(bv):
|
||||||
|
|||||||
@@ -105,13 +105,13 @@ def find_binaryninja() -> Optional[Path]:
|
|||||||
logger.debug("detected OS: linux")
|
logger.debug("detected OS: linux")
|
||||||
elif sys.platform == "darwin":
|
elif sys.platform == "darwin":
|
||||||
logger.warning("unsupported platform to find Binary Ninja: %s", sys.platform)
|
logger.warning("unsupported platform to find Binary Ninja: %s", sys.platform)
|
||||||
return False
|
return None
|
||||||
elif sys.platform == "win32":
|
elif sys.platform == "win32":
|
||||||
logger.warning("unsupported platform to find Binary Ninja: %s", sys.platform)
|
logger.warning("unsupported platform to find Binary Ninja: %s", sys.platform)
|
||||||
return False
|
return None
|
||||||
else:
|
else:
|
||||||
logger.warning("unsupported platform to find Binary Ninja: %s", sys.platform)
|
logger.warning("unsupported platform to find Binary Ninja: %s", sys.platform)
|
||||||
return False
|
return None
|
||||||
|
|
||||||
desktop_entry = get_desktop_entry("com.vector35.binaryninja.desktop")
|
desktop_entry = get_desktop_entry("com.vector35.binaryninja.desktop")
|
||||||
if not desktop_entry:
|
if not desktop_entry:
|
||||||
|
|||||||
@@ -5,14 +5,28 @@
|
|||||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
from typing import Tuple, Iterator
|
import string
|
||||||
|
from typing import Iterator
|
||||||
|
|
||||||
from binaryninja import Function, BinaryView, SymbolType, RegisterValueType, LowLevelILOperation
|
from binaryninja import (
|
||||||
|
Function,
|
||||||
|
BinaryView,
|
||||||
|
SymbolType,
|
||||||
|
ILException,
|
||||||
|
RegisterValueType,
|
||||||
|
VariableSourceType,
|
||||||
|
LowLevelILOperation,
|
||||||
|
MediumLevelILOperation,
|
||||||
|
MediumLevelILBasicBlock,
|
||||||
|
MediumLevelILInstruction,
|
||||||
|
)
|
||||||
|
|
||||||
from capa.features.file import FunctionName
|
from capa.features.file import FunctionName
|
||||||
from capa.features.common import Feature, Characteristic
|
from capa.features.common import Feature, Characteristic
|
||||||
from capa.features.address import Address, AbsoluteVirtualAddress
|
from capa.features.address import Address, AbsoluteVirtualAddress
|
||||||
from capa.features.extractors import loops
|
from capa.features.extractors import loops
|
||||||
|
from capa.features.extractors.helpers import MIN_STACKSTRING_LEN
|
||||||
|
from capa.features.extractors.binja.helpers import get_llil_instr_at_addr
|
||||||
from capa.features.extractors.base_extractor import FunctionHandle
|
from capa.features.extractors.base_extractor import FunctionHandle
|
||||||
|
|
||||||
|
|
||||||
@@ -24,7 +38,7 @@ def extract_function_calls_to(fh: FunctionHandle):
|
|||||||
# Everything that is a code reference to the current function is considered a caller, which actually includes
|
# Everything that is a code reference to the current function is considered a caller, which actually includes
|
||||||
# many other references that are NOT a caller. For example, an instruction `push function_start` will also be
|
# many other references that are NOT a caller. For example, an instruction `push function_start` will also be
|
||||||
# considered a caller to the function
|
# considered a caller to the function
|
||||||
llil = caller.llil
|
llil = get_llil_instr_at_addr(func.view, caller.address)
|
||||||
if (llil is None) or llil.operation not in [
|
if (llil is None) or llil.operation not in [
|
||||||
LowLevelILOperation.LLIL_CALL,
|
LowLevelILOperation.LLIL_CALL,
|
||||||
LowLevelILOperation.LLIL_CALL_STACK_ADJUST,
|
LowLevelILOperation.LLIL_CALL_STACK_ADJUST,
|
||||||
@@ -33,14 +47,13 @@ def extract_function_calls_to(fh: FunctionHandle):
|
|||||||
]:
|
]:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if llil.dest.value.type not in [
|
if llil.dest.operation not in [
|
||||||
RegisterValueType.ImportedAddressValue,
|
LowLevelILOperation.LLIL_CONST,
|
||||||
RegisterValueType.ConstantValue,
|
LowLevelILOperation.LLIL_CONST_PTR,
|
||||||
RegisterValueType.ConstantPointerValue,
|
|
||||||
]:
|
]:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
address = llil.dest.value.value
|
address = llil.dest.constant
|
||||||
if address != func.start:
|
if address != func.start:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
@@ -95,10 +108,103 @@ def extract_function_name(fh: FunctionHandle):
|
|||||||
yield FunctionName(name[1:]), sym.address
|
yield FunctionName(name[1:]), sym.address
|
||||||
|
|
||||||
|
|
||||||
def extract_features(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
def get_printable_len_ascii(s: bytes) -> int:
|
||||||
|
"""Return string length if all operand bytes are ascii or utf16-le printable"""
|
||||||
|
count = 0
|
||||||
|
for c in s:
|
||||||
|
if c == 0:
|
||||||
|
return count
|
||||||
|
if c < 127 and chr(c) in string.printable:
|
||||||
|
count += 1
|
||||||
|
return count
|
||||||
|
|
||||||
|
|
||||||
|
def get_printable_len_wide(s: bytes) -> int:
|
||||||
|
"""Return string length if all operand bytes are ascii or utf16-le printable"""
|
||||||
|
if all(c == 0x00 for c in s[1::2]):
|
||||||
|
return get_printable_len_ascii(s[::2])
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
def get_stack_string_len(f: Function, il: MediumLevelILInstruction) -> int:
|
||||||
|
bv: BinaryView = f.view
|
||||||
|
|
||||||
|
if il.operation != MediumLevelILOperation.MLIL_CALL:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
target = il.dest
|
||||||
|
if target.operation not in [MediumLevelILOperation.MLIL_CONST, MediumLevelILOperation.MLIL_CONST_PTR]:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
addr = target.value.value
|
||||||
|
sym = bv.get_symbol_at(addr)
|
||||||
|
if not sym or sym.type not in [SymbolType.LibraryFunctionSymbol, SymbolType.SymbolicFunctionSymbol]:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
if sym.name not in ["__builtin_strncpy", "__builtin_strcpy", "__builtin_wcscpy"]:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
if len(il.params) < 2:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
dest = il.params[0]
|
||||||
|
if dest.operation in [MediumLevelILOperation.MLIL_ADDRESS_OF, MediumLevelILOperation.MLIL_VAR]:
|
||||||
|
var = dest.src
|
||||||
|
else:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
if var.source_type != VariableSourceType.StackVariableSourceType:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
src = il.params[1]
|
||||||
|
if src.value.type != RegisterValueType.ConstantDataAggregateValue:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
s = f.get_constant_data(RegisterValueType.ConstantDataAggregateValue, src.value.value)
|
||||||
|
return max(get_printable_len_ascii(bytes(s)), get_printable_len_wide(bytes(s)))
|
||||||
|
|
||||||
|
|
||||||
|
def bb_contains_stackstring(f: Function, bb: MediumLevelILBasicBlock) -> bool:
|
||||||
|
"""check basic block for stackstring indicators
|
||||||
|
|
||||||
|
true if basic block contains enough moves of constant bytes to the stack
|
||||||
|
"""
|
||||||
|
count = 0
|
||||||
|
for il in bb:
|
||||||
|
count += get_stack_string_len(f, il)
|
||||||
|
if count > MIN_STACKSTRING_LEN:
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def extract_stackstring(fh: FunctionHandle):
|
||||||
|
"""extract stackstring indicators"""
|
||||||
|
func: Function = fh.inner
|
||||||
|
bv: BinaryView = func.view
|
||||||
|
if bv is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
mlil = func.mlil
|
||||||
|
except ILException:
|
||||||
|
return
|
||||||
|
|
||||||
|
for block in mlil.basic_blocks:
|
||||||
|
if bb_contains_stackstring(func, block):
|
||||||
|
yield Characteristic("stack string"), block.source_block.start
|
||||||
|
|
||||||
|
|
||||||
|
def extract_features(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
for func_handler in FUNCTION_HANDLERS:
|
for func_handler in FUNCTION_HANDLERS:
|
||||||
for feature, addr in func_handler(fh):
|
for feature, addr in func_handler(fh):
|
||||||
yield feature, addr
|
yield feature, addr
|
||||||
|
|
||||||
|
|
||||||
FUNCTION_HANDLERS = (extract_function_calls_to, extract_function_loop, extract_recursive_call, extract_function_name)
|
FUNCTION_HANDLERS = (
|
||||||
|
extract_function_calls_to,
|
||||||
|
extract_function_loop,
|
||||||
|
extract_recursive_call,
|
||||||
|
extract_function_name,
|
||||||
|
extract_stackstring,
|
||||||
|
)
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
import logging
|
import logging
|
||||||
from typing import Tuple, Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
from binaryninja import BinaryView
|
from binaryninja import BinaryView
|
||||||
|
|
||||||
@@ -16,7 +16,7 @@ from capa.features.address import NO_ADDRESS, Address
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def extract_os(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
def extract_os(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
|
||||||
name = bv.platform.name
|
name = bv.platform.name
|
||||||
if "-" in name:
|
if "-" in name:
|
||||||
name = name.split("-")[0]
|
name = name.split("-")[0]
|
||||||
@@ -45,7 +45,7 @@ def extract_os(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
|||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
def extract_arch(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
|
def extract_arch(bv: BinaryView) -> Iterator[tuple[Feature, Address]]:
|
||||||
arch = bv.arch.name
|
arch = bv.arch.name
|
||||||
if arch == "x86_64":
|
if arch == "x86_64":
|
||||||
yield Arch(ARCH_AMD64), NO_ADDRESS
|
yield Arch(ARCH_AMD64), NO_ADDRESS
|
||||||
|
|||||||
@@ -6,10 +6,10 @@
|
|||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
import re
|
import re
|
||||||
from typing import List, Callable
|
from typing import Callable, Optional
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
|
||||||
from binaryninja import BinaryView, LowLevelILInstruction
|
from binaryninja import BinaryView, LowLevelILFunction, LowLevelILInstruction
|
||||||
from binaryninja.architecture import InstructionTextToken
|
from binaryninja.architecture import InstructionTextToken
|
||||||
|
|
||||||
|
|
||||||
@@ -17,7 +17,7 @@ from binaryninja.architecture import InstructionTextToken
|
|||||||
class DisassemblyInstruction:
|
class DisassemblyInstruction:
|
||||||
address: int
|
address: int
|
||||||
length: int
|
length: int
|
||||||
text: List[InstructionTextToken]
|
text: list[InstructionTextToken]
|
||||||
|
|
||||||
|
|
||||||
LLIL_VISITOR = Callable[[LowLevelILInstruction, LowLevelILInstruction, int], bool]
|
LLIL_VISITOR = Callable[[LowLevelILInstruction, LowLevelILInstruction, int], bool]
|
||||||
@@ -54,7 +54,7 @@ def unmangle_c_name(name: str) -> str:
|
|||||||
|
|
||||||
|
|
||||||
def read_c_string(bv: BinaryView, offset: int, max_len: int) -> str:
|
def read_c_string(bv: BinaryView, offset: int, max_len: int) -> str:
|
||||||
s: List[str] = []
|
s: list[str] = []
|
||||||
while len(s) < max_len:
|
while len(s) < max_len:
|
||||||
try:
|
try:
|
||||||
c = bv.read(offset + len(s), 1)[0]
|
c = bv.read(offset + len(s), 1)[0]
|
||||||
@@ -67,3 +67,13 @@ def read_c_string(bv: BinaryView, offset: int, max_len: int) -> str:
|
|||||||
s.append(chr(c))
|
s.append(chr(c))
|
||||||
|
|
||||||
return "".join(s)
|
return "".join(s)
|
||||||
|
|
||||||
|
|
||||||
|
def get_llil_instr_at_addr(bv: BinaryView, addr: int) -> Optional[LowLevelILInstruction]:
|
||||||
|
arch = bv.arch
|
||||||
|
buffer = bv.read(addr, arch.max_instr_length)
|
||||||
|
llil = LowLevelILFunction(arch=arch)
|
||||||
|
llil.current_address = addr
|
||||||
|
if arch.get_instruction_low_level_il(buffer, addr, llil) == 0:
|
||||||
|
return None
|
||||||
|
return llil[0]
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
from typing import Any, List, Tuple, Iterator, Optional
|
from typing import Any, Iterator, Optional
|
||||||
|
|
||||||
from binaryninja import Function
|
from binaryninja import Function
|
||||||
from binaryninja import BasicBlock as BinjaBasicBlock
|
from binaryninja import BasicBlock as BinjaBasicBlock
|
||||||
@@ -23,7 +23,7 @@ import capa.features.extractors.helpers
|
|||||||
from capa.features.insn import API, MAX_STRUCTURE_SIZE, Number, Offset, Mnemonic, OperandNumber, OperandOffset
|
from capa.features.insn import API, MAX_STRUCTURE_SIZE, Number, Offset, Mnemonic, OperandNumber, OperandOffset
|
||||||
from capa.features.common import MAX_BYTES_FEATURE_SIZE, Bytes, String, Feature, Characteristic
|
from capa.features.common import MAX_BYTES_FEATURE_SIZE, Bytes, String, Feature, Characteristic
|
||||||
from capa.features.address import Address, AbsoluteVirtualAddress
|
from capa.features.address import Address, AbsoluteVirtualAddress
|
||||||
from capa.features.extractors.binja.helpers import DisassemblyInstruction, visit_llil_exprs
|
from capa.features.extractors.binja.helpers import DisassemblyInstruction, visit_llil_exprs, get_llil_instr_at_addr
|
||||||
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle
|
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle
|
||||||
|
|
||||||
# security cookie checks may perform non-zeroing XORs, these are expected within a certain
|
# security cookie checks may perform non-zeroing XORs, these are expected within a certain
|
||||||
@@ -36,35 +36,27 @@ SECURITY_COOKIE_BYTES_DELTA = 0x40
|
|||||||
# 2. The function must only make one call/jump to another address
|
# 2. The function must only make one call/jump to another address
|
||||||
# If the function being checked is a stub function, returns the target address. Otherwise, return None.
|
# If the function being checked is a stub function, returns the target address. Otherwise, return None.
|
||||||
def is_stub_function(bv: BinaryView, addr: int) -> Optional[int]:
|
def is_stub_function(bv: BinaryView, addr: int) -> Optional[int]:
|
||||||
funcs = bv.get_functions_at(addr)
|
llil = get_llil_instr_at_addr(bv, addr)
|
||||||
for func in funcs:
|
if llil is None or llil.operation not in [
|
||||||
if len(func.basic_blocks) != 1:
|
LowLevelILOperation.LLIL_CALL,
|
||||||
continue
|
LowLevelILOperation.LLIL_CALL_STACK_ADJUST,
|
||||||
|
LowLevelILOperation.LLIL_JUMP,
|
||||||
|
LowLevelILOperation.LLIL_TAILCALL,
|
||||||
|
]:
|
||||||
|
return None
|
||||||
|
|
||||||
call_count = 0
|
# The LLIL instruction retrieved by `get_llil_instr_at_addr` did not go through a full analysis, so we cannot check
|
||||||
call_target = None
|
# `llil.dest.value.type` here
|
||||||
for il in func.llil.instructions:
|
if llil.dest.operation not in [
|
||||||
if il.operation in [
|
LowLevelILOperation.LLIL_CONST,
|
||||||
LowLevelILOperation.LLIL_CALL,
|
LowLevelILOperation.LLIL_CONST_PTR,
|
||||||
LowLevelILOperation.LLIL_CALL_STACK_ADJUST,
|
]:
|
||||||
LowLevelILOperation.LLIL_JUMP,
|
return None
|
||||||
LowLevelILOperation.LLIL_TAILCALL,
|
|
||||||
]:
|
|
||||||
call_count += 1
|
|
||||||
if il.dest.value.type in [
|
|
||||||
RegisterValueType.ImportedAddressValue,
|
|
||||||
RegisterValueType.ConstantValue,
|
|
||||||
RegisterValueType.ConstantPointerValue,
|
|
||||||
]:
|
|
||||||
call_target = il.dest.value.value
|
|
||||||
|
|
||||||
if call_count == 1 and call_target is not None:
|
return llil.dest.constant
|
||||||
return call_target
|
|
||||||
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse instruction API features
|
parse instruction API features
|
||||||
|
|
||||||
@@ -123,7 +115,7 @@ def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle)
|
|||||||
|
|
||||||
def extract_insn_number_features(
|
def extract_insn_number_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse instruction number features
|
parse instruction number features
|
||||||
example:
|
example:
|
||||||
@@ -131,7 +123,7 @@ def extract_insn_number_features(
|
|||||||
"""
|
"""
|
||||||
func: Function = fh.inner
|
func: Function = fh.inner
|
||||||
|
|
||||||
results: List[Tuple[Any[Number, OperandNumber], Address]] = []
|
results: list[tuple[Any[Number, OperandNumber], Address]] = []
|
||||||
|
|
||||||
def llil_checker(il: LowLevelILInstruction, parent: LowLevelILInstruction, index: int) -> bool:
|
def llil_checker(il: LowLevelILInstruction, parent: LowLevelILInstruction, index: int) -> bool:
|
||||||
if il.operation == LowLevelILOperation.LLIL_LOAD:
|
if il.operation == LowLevelILOperation.LLIL_LOAD:
|
||||||
@@ -162,7 +154,7 @@ def extract_insn_number_features(
|
|||||||
yield from results
|
yield from results
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse referenced byte sequences
|
parse referenced byte sequences
|
||||||
example:
|
example:
|
||||||
@@ -209,7 +201,7 @@ def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandl
|
|||||||
|
|
||||||
def extract_insn_string_features(
|
def extract_insn_string_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse instruction string features
|
parse instruction string features
|
||||||
|
|
||||||
@@ -266,7 +258,7 @@ def extract_insn_string_features(
|
|||||||
|
|
||||||
def extract_insn_offset_features(
|
def extract_insn_offset_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse instruction structure offset features
|
parse instruction structure offset features
|
||||||
|
|
||||||
@@ -275,7 +267,7 @@ def extract_insn_offset_features(
|
|||||||
"""
|
"""
|
||||||
func: Function = fh.inner
|
func: Function = fh.inner
|
||||||
|
|
||||||
results: List[Tuple[Any[Offset, OperandOffset], Address]] = []
|
results: list[tuple[Any[Offset, OperandOffset], Address]] = []
|
||||||
address_size = func.view.arch.address_size * 8
|
address_size = func.view.arch.address_size * 8
|
||||||
|
|
||||||
def llil_checker(il: LowLevelILInstruction, parent: LowLevelILInstruction, index: int) -> bool:
|
def llil_checker(il: LowLevelILInstruction, parent: LowLevelILInstruction, index: int) -> bool:
|
||||||
@@ -353,7 +345,7 @@ def is_nzxor_stack_cookie(f: Function, bb: BinjaBasicBlock, llil: LowLevelILInst
|
|||||||
|
|
||||||
def extract_insn_nzxor_characteristic_features(
|
def extract_insn_nzxor_characteristic_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse instruction non-zeroing XOR instruction
|
parse instruction non-zeroing XOR instruction
|
||||||
ignore expected non-zeroing XORs, e.g. security cookies
|
ignore expected non-zeroing XORs, e.g. security cookies
|
||||||
@@ -367,7 +359,7 @@ def extract_insn_nzxor_characteristic_features(
|
|||||||
# e.g., <llil: eax = 0>, (LLIL_SET_REG). So we do not need to check whether the two operands are the same.
|
# e.g., <llil: eax = 0>, (LLIL_SET_REG). So we do not need to check whether the two operands are the same.
|
||||||
if il.operation == LowLevelILOperation.LLIL_XOR:
|
if il.operation == LowLevelILOperation.LLIL_XOR:
|
||||||
# Exclude cases related to the stack cookie
|
# Exclude cases related to the stack cookie
|
||||||
if is_nzxor_stack_cookie(fh.inner, bbh.inner[0], il):
|
if is_nzxor_stack_cookie(fh.inner, bbh.inner, il):
|
||||||
return False
|
return False
|
||||||
results.append((Characteristic("nzxor"), ih.address))
|
results.append((Characteristic("nzxor"), ih.address))
|
||||||
return False
|
return False
|
||||||
@@ -382,7 +374,7 @@ def extract_insn_nzxor_characteristic_features(
|
|||||||
|
|
||||||
def extract_insn_mnemonic_features(
|
def extract_insn_mnemonic_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""parse instruction mnemonic features"""
|
"""parse instruction mnemonic features"""
|
||||||
insn: DisassemblyInstruction = ih.inner
|
insn: DisassemblyInstruction = ih.inner
|
||||||
yield Mnemonic(insn.text[0].text), ih.address
|
yield Mnemonic(insn.text[0].text), ih.address
|
||||||
@@ -390,7 +382,7 @@ def extract_insn_mnemonic_features(
|
|||||||
|
|
||||||
def extract_insn_obfs_call_plus_5_characteristic_features(
|
def extract_insn_obfs_call_plus_5_characteristic_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse call $+5 instruction from the given instruction.
|
parse call $+5 instruction from the given instruction.
|
||||||
"""
|
"""
|
||||||
@@ -401,7 +393,7 @@ def extract_insn_obfs_call_plus_5_characteristic_features(
|
|||||||
|
|
||||||
def extract_insn_peb_access_characteristic_features(
|
def extract_insn_peb_access_characteristic_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""parse instruction peb access
|
"""parse instruction peb access
|
||||||
|
|
||||||
fs:[0x30] on x86, gs:[0x60] on x64
|
fs:[0x30] on x86, gs:[0x60] on x64
|
||||||
@@ -444,7 +436,7 @@ def extract_insn_peb_access_characteristic_features(
|
|||||||
|
|
||||||
def extract_insn_segment_access_features(
|
def extract_insn_segment_access_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""parse instruction fs or gs access"""
|
"""parse instruction fs or gs access"""
|
||||||
func: Function = fh.inner
|
func: Function = fh.inner
|
||||||
|
|
||||||
@@ -471,7 +463,7 @@ def extract_insn_segment_access_features(
|
|||||||
|
|
||||||
def extract_insn_cross_section_cflow(
|
def extract_insn_cross_section_cflow(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""inspect the instruction for a CALL or JMP that crosses section boundaries"""
|
"""inspect the instruction for a CALL or JMP that crosses section boundaries"""
|
||||||
func: Function = fh.inner
|
func: Function = fh.inner
|
||||||
bv: BinaryView = func.view
|
bv: BinaryView = func.view
|
||||||
@@ -491,7 +483,7 @@ def extract_insn_cross_section_cflow(
|
|||||||
yield Characteristic("cross section flow"), ih.address
|
yield Characteristic("cross section flow"), ih.address
|
||||||
|
|
||||||
|
|
||||||
def extract_function_calls_from(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_function_calls_from(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""extract functions calls from features
|
"""extract functions calls from features
|
||||||
|
|
||||||
most relevant at the function scope, however, its most efficient to extract at the instruction scope
|
most relevant at the function scope, however, its most efficient to extract at the instruction scope
|
||||||
@@ -534,7 +526,7 @@ def extract_function_calls_from(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandl
|
|||||||
|
|
||||||
def extract_function_indirect_call_characteristic_features(
|
def extract_function_indirect_call_characteristic_features(
|
||||||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""extract indirect function calls (e.g., call eax or call dword ptr [edx+4])
|
"""extract indirect function calls (e.g., call eax or call dword ptr [edx+4])
|
||||||
does not include calls like => call ds:dword_ABD4974
|
does not include calls like => call ds:dword_ABD4974
|
||||||
|
|
||||||
@@ -562,7 +554,7 @@ def extract_function_indirect_call_characteristic_features(
|
|||||||
yield Characteristic("indirect call"), ih.address
|
yield Characteristic("indirect call"), ih.address
|
||||||
|
|
||||||
|
|
||||||
def extract_features(f: FunctionHandle, bbh: BBHandle, insn: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_features(f: FunctionHandle, bbh: BBHandle, insn: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""extract instruction features"""
|
"""extract instruction features"""
|
||||||
for inst_handler in INSTRUCTION_HANDLERS:
|
for inst_handler in INSTRUCTION_HANDLERS:
|
||||||
for feature, ea in inst_handler(f, bbh, insn):
|
for feature, ea in inst_handler(f, bbh, insn):
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from typing import Tuple, Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
import capa.features.extractors.helpers
|
import capa.features.extractors.helpers
|
||||||
from capa.helpers import assert_never
|
from capa.helpers import assert_never
|
||||||
@@ -20,7 +20,7 @@ from capa.features.extractors.base_extractor import CallHandle, ThreadHandle, Pr
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
this method extracts the given call's features (such as API name and arguments),
|
this method extracts the given call's features (such as API name and arguments),
|
||||||
and returns them as API, Number, and String features.
|
and returns them as API, Number, and String features.
|
||||||
@@ -55,7 +55,7 @@ def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -
|
|||||||
yield API(name), ch.address
|
yield API(name), ch.address
|
||||||
|
|
||||||
|
|
||||||
def extract_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
for handler in CALL_HANDLERS:
|
for handler in CALL_HANDLERS:
|
||||||
for feature, addr in handler(ph, th, ch):
|
for feature, addr in handler(ph, th, ch):
|
||||||
yield feature, addr
|
yield feature, addr
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from typing import Dict, Tuple, Union, Iterator
|
from typing import Union, Iterator
|
||||||
|
|
||||||
import capa.features.extractors.cape.call
|
import capa.features.extractors.cape.call
|
||||||
import capa.features.extractors.cape.file
|
import capa.features.extractors.cape.file
|
||||||
@@ -50,16 +50,16 @@ class CapeExtractor(DynamicFeatureExtractor):
|
|||||||
assert self.report.static is not None and self.report.static.pe is not None
|
assert self.report.static is not None and self.report.static.pe is not None
|
||||||
return AbsoluteVirtualAddress(self.report.static.pe.imagebase)
|
return AbsoluteVirtualAddress(self.report.static.pe.imagebase)
|
||||||
|
|
||||||
def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]:
|
def extract_global_features(self) -> Iterator[tuple[Feature, Address]]:
|
||||||
yield from self.global_features
|
yield from self.global_features
|
||||||
|
|
||||||
def extract_file_features(self) -> Iterator[Tuple[Feature, Address]]:
|
def extract_file_features(self) -> Iterator[tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.cape.file.extract_features(self.report)
|
yield from capa.features.extractors.cape.file.extract_features(self.report)
|
||||||
|
|
||||||
def get_processes(self) -> Iterator[ProcessHandle]:
|
def get_processes(self) -> Iterator[ProcessHandle]:
|
||||||
yield from capa.features.extractors.cape.file.get_processes(self.report)
|
yield from capa.features.extractors.cape.file.get_processes(self.report)
|
||||||
|
|
||||||
def extract_process_features(self, ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_process_features(self, ph: ProcessHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.cape.process.extract_features(ph)
|
yield from capa.features.extractors.cape.process.extract_features(ph)
|
||||||
|
|
||||||
def get_process_name(self, ph) -> str:
|
def get_process_name(self, ph) -> str:
|
||||||
@@ -69,7 +69,7 @@ class CapeExtractor(DynamicFeatureExtractor):
|
|||||||
def get_threads(self, ph: ProcessHandle) -> Iterator[ThreadHandle]:
|
def get_threads(self, ph: ProcessHandle) -> Iterator[ThreadHandle]:
|
||||||
yield from capa.features.extractors.cape.process.get_threads(ph)
|
yield from capa.features.extractors.cape.process.get_threads(ph)
|
||||||
|
|
||||||
def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
if False:
|
if False:
|
||||||
# force this routine to be a generator,
|
# force this routine to be a generator,
|
||||||
# but we don't actually have any elements to generate.
|
# but we don't actually have any elements to generate.
|
||||||
@@ -81,7 +81,7 @@ class CapeExtractor(DynamicFeatureExtractor):
|
|||||||
|
|
||||||
def extract_call_features(
|
def extract_call_features(
|
||||||
self, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle
|
self, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.cape.call.extract_features(ph, th, ch)
|
yield from capa.features.extractors.cape.call.extract_features(ph, th, ch)
|
||||||
|
|
||||||
def get_call_name(self, ph, th, ch) -> str:
|
def get_call_name(self, ph, th, ch) -> str:
|
||||||
@@ -122,7 +122,7 @@ class CapeExtractor(DynamicFeatureExtractor):
|
|||||||
return "".join(parts)
|
return "".join(parts)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_report(cls, report: Dict) -> "CapeExtractor":
|
def from_report(cls, report: dict) -> "CapeExtractor":
|
||||||
cr = CapeReport.model_validate(report)
|
cr = CapeReport.model_validate(report)
|
||||||
|
|
||||||
if cr.info.version not in TESTED_VERSIONS:
|
if cr.info.version not in TESTED_VERSIONS:
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from typing import Tuple, Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
from capa.features.file import Export, Import, Section
|
from capa.features.file import Export, Import, Section
|
||||||
from capa.features.common import String, Feature
|
from capa.features.common import String, Feature
|
||||||
@@ -41,7 +41,7 @@ def get_processes(report: CapeReport) -> Iterator[ProcessHandle]:
|
|||||||
seen_processes[addr].append(process)
|
seen_processes[addr].append(process)
|
||||||
|
|
||||||
|
|
||||||
def extract_import_names(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
def extract_import_names(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
extract imported function names
|
extract imported function names
|
||||||
"""
|
"""
|
||||||
@@ -62,57 +62,57 @@ def extract_import_names(report: CapeReport) -> Iterator[Tuple[Feature, Address]
|
|||||||
yield Import(name), AbsoluteVirtualAddress(function.address)
|
yield Import(name), AbsoluteVirtualAddress(function.address)
|
||||||
|
|
||||||
|
|
||||||
def extract_export_names(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
def extract_export_names(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||||
assert report.static is not None and report.static.pe is not None
|
assert report.static is not None and report.static.pe is not None
|
||||||
for function in report.static.pe.exports:
|
for function in report.static.pe.exports:
|
||||||
yield Export(function.name), AbsoluteVirtualAddress(function.address)
|
yield Export(function.name), AbsoluteVirtualAddress(function.address)
|
||||||
|
|
||||||
|
|
||||||
def extract_section_names(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
def extract_section_names(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||||
assert report.static is not None and report.static.pe is not None
|
assert report.static is not None and report.static.pe is not None
|
||||||
for section in report.static.pe.sections:
|
for section in report.static.pe.sections:
|
||||||
yield Section(section.name), AbsoluteVirtualAddress(section.virtual_address)
|
yield Section(section.name), AbsoluteVirtualAddress(section.virtual_address)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_strings(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
def extract_file_strings(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||||
if report.strings is not None:
|
if report.strings is not None:
|
||||||
for string in report.strings:
|
for string in report.strings:
|
||||||
yield String(string), NO_ADDRESS
|
yield String(string), NO_ADDRESS
|
||||||
|
|
||||||
|
|
||||||
def extract_used_regkeys(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
def extract_used_regkeys(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||||
for regkey in report.behavior.summary.keys:
|
for regkey in report.behavior.summary.keys:
|
||||||
yield String(regkey), NO_ADDRESS
|
yield String(regkey), NO_ADDRESS
|
||||||
|
|
||||||
|
|
||||||
def extract_used_files(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
def extract_used_files(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||||
for file in report.behavior.summary.files:
|
for file in report.behavior.summary.files:
|
||||||
yield String(file), NO_ADDRESS
|
yield String(file), NO_ADDRESS
|
||||||
|
|
||||||
|
|
||||||
def extract_used_mutexes(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
def extract_used_mutexes(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||||
for mutex in report.behavior.summary.mutexes:
|
for mutex in report.behavior.summary.mutexes:
|
||||||
yield String(mutex), NO_ADDRESS
|
yield String(mutex), NO_ADDRESS
|
||||||
|
|
||||||
|
|
||||||
def extract_used_commands(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
def extract_used_commands(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||||
for cmd in report.behavior.summary.executed_commands:
|
for cmd in report.behavior.summary.executed_commands:
|
||||||
yield String(cmd), NO_ADDRESS
|
yield String(cmd), NO_ADDRESS
|
||||||
|
|
||||||
|
|
||||||
def extract_used_apis(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
def extract_used_apis(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||||
for symbol in report.behavior.summary.resolved_apis:
|
for symbol in report.behavior.summary.resolved_apis:
|
||||||
yield String(symbol), NO_ADDRESS
|
yield String(symbol), NO_ADDRESS
|
||||||
|
|
||||||
|
|
||||||
def extract_used_services(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
def extract_used_services(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||||
for svc in report.behavior.summary.created_services:
|
for svc in report.behavior.summary.created_services:
|
||||||
yield String(svc), NO_ADDRESS
|
yield String(svc), NO_ADDRESS
|
||||||
for svc in report.behavior.summary.started_services:
|
for svc in report.behavior.summary.started_services:
|
||||||
yield String(svc), NO_ADDRESS
|
yield String(svc), NO_ADDRESS
|
||||||
|
|
||||||
|
|
||||||
def extract_features(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
def extract_features(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||||
for handler in FILE_HANDLERS:
|
for handler in FILE_HANDLERS:
|
||||||
for feature, addr in handler(report):
|
for feature, addr in handler(report):
|
||||||
yield feature, addr
|
yield feature, addr
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from typing import Tuple, Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
from capa.features.common import (
|
from capa.features.common import (
|
||||||
OS,
|
OS,
|
||||||
@@ -28,7 +28,7 @@ from capa.features.extractors.cape.models import CapeReport
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def extract_arch(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
def extract_arch(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||||
if "Intel 80386" in report.target.file.type:
|
if "Intel 80386" in report.target.file.type:
|
||||||
yield Arch(ARCH_I386), NO_ADDRESS
|
yield Arch(ARCH_I386), NO_ADDRESS
|
||||||
elif "x86-64" in report.target.file.type:
|
elif "x86-64" in report.target.file.type:
|
||||||
@@ -40,7 +40,7 @@ def extract_arch(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def extract_format(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
def extract_format(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||||
if "PE" in report.target.file.type:
|
if "PE" in report.target.file.type:
|
||||||
yield Format(FORMAT_PE), NO_ADDRESS
|
yield Format(FORMAT_PE), NO_ADDRESS
|
||||||
elif "ELF" in report.target.file.type:
|
elif "ELF" in report.target.file.type:
|
||||||
@@ -52,7 +52,7 @@ def extract_format(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def extract_os(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
def extract_os(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||||
# this variable contains the output of the file command
|
# this variable contains the output of the file command
|
||||||
file_output = report.target.file.type
|
file_output = report.target.file.type
|
||||||
|
|
||||||
@@ -80,7 +80,7 @@ def extract_os(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
|||||||
yield OS(OS_ANY), NO_ADDRESS
|
yield OS(OS_ANY), NO_ADDRESS
|
||||||
|
|
||||||
|
|
||||||
def extract_features(report: CapeReport) -> Iterator[Tuple[Feature, Address]]:
|
def extract_features(report: CapeReport) -> Iterator[tuple[Feature, Address]]:
|
||||||
for global_handler in GLOBAL_HANDLER:
|
for global_handler in GLOBAL_HANDLER:
|
||||||
for feature, addr in global_handler(report):
|
for feature, addr in global_handler(report):
|
||||||
yield feature, addr
|
yield feature, addr
|
||||||
|
|||||||
@@ -6,12 +6,12 @@
|
|||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
|
||||||
from typing import Any, Dict, List
|
from typing import Any
|
||||||
|
|
||||||
from capa.features.extractors.base_extractor import ProcessHandle
|
from capa.features.extractors.base_extractor import ProcessHandle
|
||||||
|
|
||||||
|
|
||||||
def find_process(processes: List[Dict[str, Any]], ph: ProcessHandle) -> Dict[str, Any]:
|
def find_process(processes: list[dict[str, Any]], ph: ProcessHandle) -> dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
find a specific process identified by a process handler.
|
find a specific process identified by a process handler.
|
||||||
|
|
||||||
|
|||||||
@@ -6,10 +6,9 @@
|
|||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
import binascii
|
import binascii
|
||||||
from typing import Any, Dict, List, Union, Literal, Optional
|
from typing import Any, Union, Literal, Optional, Annotated, TypeAlias
|
||||||
|
|
||||||
from pydantic import Field, BaseModel, ConfigDict
|
from pydantic import Field, BaseModel, ConfigDict
|
||||||
from typing_extensions import Annotated, TypeAlias
|
|
||||||
from pydantic.functional_validators import BeforeValidator
|
from pydantic.functional_validators import BeforeValidator
|
||||||
|
|
||||||
|
|
||||||
@@ -59,11 +58,11 @@ Skip: TypeAlias = Optional[Any]
|
|||||||
# in a field with this type.
|
# in a field with this type.
|
||||||
# then we can update the model with the discovered shape.
|
# then we can update the model with the discovered shape.
|
||||||
TODO: TypeAlias = None
|
TODO: TypeAlias = None
|
||||||
ListTODO: TypeAlias = List[None]
|
ListTODO: TypeAlias = list[None]
|
||||||
DictTODO: TypeAlias = ExactModel
|
DictTODO: TypeAlias = ExactModel
|
||||||
|
|
||||||
EmptyDict: TypeAlias = BaseModel
|
Emptydict: TypeAlias = BaseModel
|
||||||
EmptyList: TypeAlias = List[Any]
|
EmptyList: TypeAlias = list[Any]
|
||||||
|
|
||||||
|
|
||||||
class Info(FlexibleModel):
|
class Info(FlexibleModel):
|
||||||
@@ -77,7 +76,7 @@ class ImportedSymbol(ExactModel):
|
|||||||
|
|
||||||
class ImportedDll(ExactModel):
|
class ImportedDll(ExactModel):
|
||||||
dll: str
|
dll: str
|
||||||
imports: List[ImportedSymbol]
|
imports: list[ImportedSymbol]
|
||||||
|
|
||||||
|
|
||||||
class DirectoryEntry(ExactModel):
|
class DirectoryEntry(ExactModel):
|
||||||
@@ -149,7 +148,7 @@ class Signer(ExactModel):
|
|||||||
aux_valid: Optional[bool] = None
|
aux_valid: Optional[bool] = None
|
||||||
aux_error: Optional[bool] = None
|
aux_error: Optional[bool] = None
|
||||||
aux_error_desc: Optional[str] = None
|
aux_error_desc: Optional[str] = None
|
||||||
aux_signers: Optional[List[AuxSigner]] = None
|
aux_signers: Optional[list[AuxSigner]] = None
|
||||||
|
|
||||||
|
|
||||||
class Overlay(ExactModel):
|
class Overlay(ExactModel):
|
||||||
@@ -178,22 +177,22 @@ class PE(ExactModel):
|
|||||||
pdbpath: Optional[str] = None
|
pdbpath: Optional[str] = None
|
||||||
timestamp: str
|
timestamp: str
|
||||||
|
|
||||||
# List[ImportedDll], or Dict[basename(dll), ImportedDll]
|
# list[ImportedDll], or dict[basename(dll), ImportedDll]
|
||||||
imports: Union[List[ImportedDll], Dict[str, ImportedDll]]
|
imports: Union[list[ImportedDll], dict[str, ImportedDll]]
|
||||||
imported_dll_count: Optional[int] = None
|
imported_dll_count: Optional[int] = None
|
||||||
imphash: str
|
imphash: str
|
||||||
|
|
||||||
exported_dll_name: Optional[str] = None
|
exported_dll_name: Optional[str] = None
|
||||||
exports: List[ExportedSymbol]
|
exports: list[ExportedSymbol]
|
||||||
|
|
||||||
dirents: List[DirectoryEntry]
|
dirents: list[DirectoryEntry]
|
||||||
sections: List[Section]
|
sections: list[Section]
|
||||||
|
|
||||||
ep_bytes: Optional[HexBytes] = None
|
ep_bytes: Optional[HexBytes] = None
|
||||||
|
|
||||||
overlay: Optional[Overlay] = None
|
overlay: Optional[Overlay] = None
|
||||||
resources: List[Resource]
|
resources: list[Resource]
|
||||||
versioninfo: List[KV]
|
versioninfo: list[KV]
|
||||||
|
|
||||||
# base64 encoded data
|
# base64 encoded data
|
||||||
icon: Optional[str] = None
|
icon: Optional[str] = None
|
||||||
@@ -204,7 +203,7 @@ class PE(ExactModel):
|
|||||||
# short hex string
|
# short hex string
|
||||||
icon_dhash: Optional[str] = None
|
icon_dhash: Optional[str] = None
|
||||||
|
|
||||||
digital_signers: List[DigitalSigner]
|
digital_signers: list[DigitalSigner]
|
||||||
guest_signers: Signer
|
guest_signers: Signer
|
||||||
|
|
||||||
|
|
||||||
@@ -217,9 +216,9 @@ class File(FlexibleModel):
|
|||||||
cape_type: Optional[str] = None
|
cape_type: Optional[str] = None
|
||||||
|
|
||||||
pid: Optional[Union[int, Literal[""]]] = None
|
pid: Optional[Union[int, Literal[""]]] = None
|
||||||
name: Union[List[str], str]
|
name: Union[list[str], str]
|
||||||
path: str
|
path: str
|
||||||
guest_paths: Union[List[str], str, None]
|
guest_paths: Union[list[str], str, None]
|
||||||
timestamp: Optional[str] = None
|
timestamp: Optional[str] = None
|
||||||
|
|
||||||
#
|
#
|
||||||
@@ -244,7 +243,7 @@ class File(FlexibleModel):
|
|||||||
ep_bytes: Optional[HexBytes] = None
|
ep_bytes: Optional[HexBytes] = None
|
||||||
entrypoint: Optional[int] = None
|
entrypoint: Optional[int] = None
|
||||||
data: Optional[str] = None
|
data: Optional[str] = None
|
||||||
strings: Optional[List[str]] = None
|
strings: Optional[list[str]] = None
|
||||||
|
|
||||||
#
|
#
|
||||||
# detections (skip)
|
# detections (skip)
|
||||||
@@ -283,7 +282,7 @@ class Call(ExactModel):
|
|||||||
|
|
||||||
api: str
|
api: str
|
||||||
|
|
||||||
arguments: List[Argument]
|
arguments: list[Argument]
|
||||||
status: bool
|
status: bool
|
||||||
return_: HexInt = Field(alias="return")
|
return_: HexInt = Field(alias="return")
|
||||||
pretty_return: Optional[str] = None
|
pretty_return: Optional[str] = None
|
||||||
@@ -298,15 +297,18 @@ class Call(ExactModel):
|
|||||||
id: int
|
id: int
|
||||||
|
|
||||||
|
|
||||||
class Process(ExactModel):
|
# FlexibleModel to account for extended fields
|
||||||
|
# refs: https://github.com/mandiant/capa/issues/2466
|
||||||
|
# https://github.com/kevoreilly/CAPEv2/pull/2199
|
||||||
|
class Process(FlexibleModel):
|
||||||
process_id: int
|
process_id: int
|
||||||
process_name: str
|
process_name: str
|
||||||
parent_id: int
|
parent_id: int
|
||||||
module_path: str
|
module_path: str
|
||||||
first_seen: str
|
first_seen: str
|
||||||
calls: List[Call]
|
calls: list[Call]
|
||||||
threads: List[int]
|
threads: list[int]
|
||||||
environ: Dict[str, str]
|
environ: dict[str, str]
|
||||||
|
|
||||||
|
|
||||||
class ProcessTree(ExactModel):
|
class ProcessTree(ExactModel):
|
||||||
@@ -314,25 +316,25 @@ class ProcessTree(ExactModel):
|
|||||||
pid: int
|
pid: int
|
||||||
parent_id: int
|
parent_id: int
|
||||||
module_path: str
|
module_path: str
|
||||||
threads: List[int]
|
threads: list[int]
|
||||||
environ: Dict[str, str]
|
environ: dict[str, str]
|
||||||
children: List["ProcessTree"]
|
children: list["ProcessTree"]
|
||||||
|
|
||||||
|
|
||||||
class Summary(ExactModel):
|
class Summary(ExactModel):
|
||||||
files: List[str]
|
files: list[str]
|
||||||
read_files: List[str]
|
read_files: list[str]
|
||||||
write_files: List[str]
|
write_files: list[str]
|
||||||
delete_files: List[str]
|
delete_files: list[str]
|
||||||
keys: List[str]
|
keys: list[str]
|
||||||
read_keys: List[str]
|
read_keys: list[str]
|
||||||
write_keys: List[str]
|
write_keys: list[str]
|
||||||
delete_keys: List[str]
|
delete_keys: list[str]
|
||||||
executed_commands: List[str]
|
executed_commands: list[str]
|
||||||
resolved_apis: List[str]
|
resolved_apis: list[str]
|
||||||
mutexes: List[str]
|
mutexes: list[str]
|
||||||
created_services: List[str]
|
created_services: list[str]
|
||||||
started_services: List[str]
|
started_services: list[str]
|
||||||
|
|
||||||
|
|
||||||
class EncryptedBuffer(ExactModel):
|
class EncryptedBuffer(ExactModel):
|
||||||
@@ -349,12 +351,12 @@ class Behavior(ExactModel):
|
|||||||
summary: Summary
|
summary: Summary
|
||||||
|
|
||||||
# list of processes, of threads, of calls
|
# list of processes, of threads, of calls
|
||||||
processes: List[Process]
|
processes: list[Process]
|
||||||
# tree of processes
|
# tree of processes
|
||||||
processtree: List[ProcessTree]
|
processtree: list[ProcessTree]
|
||||||
|
|
||||||
anomaly: List[str]
|
anomaly: list[str]
|
||||||
encryptedbuffers: List[EncryptedBuffer]
|
encryptedbuffers: list[EncryptedBuffer]
|
||||||
# these are small objects that describe atomic events,
|
# these are small objects that describe atomic events,
|
||||||
# like file move, registry access.
|
# like file move, registry access.
|
||||||
# we'll detect the same with our API call analysis.
|
# we'll detect the same with our API call analysis.
|
||||||
@@ -373,7 +375,7 @@ class Static(ExactModel):
|
|||||||
|
|
||||||
|
|
||||||
class Cape(ExactModel):
|
class Cape(ExactModel):
|
||||||
payloads: List[ProcessFile]
|
payloads: list[ProcessFile]
|
||||||
configs: Skip = None
|
configs: Skip = None
|
||||||
|
|
||||||
|
|
||||||
@@ -389,7 +391,7 @@ class CapeReport(FlexibleModel):
|
|||||||
# static analysis results
|
# static analysis results
|
||||||
#
|
#
|
||||||
static: Optional[Static] = None
|
static: Optional[Static] = None
|
||||||
strings: Optional[List[str]] = None
|
strings: Optional[list[str]] = None
|
||||||
|
|
||||||
#
|
#
|
||||||
# dynamic analysis results
|
# dynamic analysis results
|
||||||
@@ -398,10 +400,10 @@ class CapeReport(FlexibleModel):
|
|||||||
behavior: Behavior
|
behavior: Behavior
|
||||||
|
|
||||||
# post-processed results: payloads and extracted configs
|
# post-processed results: payloads and extracted configs
|
||||||
CAPE: Optional[Union[Cape, List]] = None
|
CAPE: Optional[Union[Cape, list]] = None
|
||||||
dropped: Optional[List[File]] = None
|
dropped: Optional[list[File]] = None
|
||||||
procdump: Optional[List[ProcessFile]] = None
|
procdump: Optional[list[ProcessFile]] = None
|
||||||
procmemory: ListTODO
|
procmemory: Optional[ListTODO] = None
|
||||||
|
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
# information we won't use in capa
|
# information we won't use in capa
|
||||||
@@ -437,7 +439,7 @@ class CapeReport(FlexibleModel):
|
|||||||
malfamily_tag: Optional[str] = None
|
malfamily_tag: Optional[str] = None
|
||||||
malscore: float
|
malscore: float
|
||||||
detections: Skip = None
|
detections: Skip = None
|
||||||
detections2pid: Optional[Dict[int, List[str]]] = None
|
detections2pid: Optional[dict[int, list[str]]] = None
|
||||||
# AV detections for the sample.
|
# AV detections for the sample.
|
||||||
virustotal: Skip = None
|
virustotal: Skip = None
|
||||||
|
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from typing import List, Tuple, Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
from capa.features.common import String, Feature
|
from capa.features.common import String, Feature
|
||||||
from capa.features.address import Address, ThreadAddress
|
from capa.features.address import Address, ThreadAddress
|
||||||
@@ -22,14 +22,14 @@ def get_threads(ph: ProcessHandle) -> Iterator[ThreadHandle]:
|
|||||||
get the threads associated with a given process
|
get the threads associated with a given process
|
||||||
"""
|
"""
|
||||||
process: Process = ph.inner
|
process: Process = ph.inner
|
||||||
threads: List[int] = process.threads
|
threads: list[int] = process.threads
|
||||||
|
|
||||||
for thread in threads:
|
for thread in threads:
|
||||||
address: ThreadAddress = ThreadAddress(process=ph.address, tid=thread)
|
address: ThreadAddress = ThreadAddress(process=ph.address, tid=thread)
|
||||||
yield ThreadHandle(address=address, inner={})
|
yield ThreadHandle(address=address, inner={})
|
||||||
|
|
||||||
|
|
||||||
def extract_environ_strings(ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_environ_strings(ph: ProcessHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
extract strings from a process' provided environment variables.
|
extract strings from a process' provided environment variables.
|
||||||
"""
|
"""
|
||||||
@@ -39,7 +39,7 @@ def extract_environ_strings(ph: ProcessHandle) -> Iterator[Tuple[Feature, Addres
|
|||||||
yield String(value), ph.address
|
yield String(value), ph.address
|
||||||
|
|
||||||
|
|
||||||
def extract_features(ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_features(ph: ProcessHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
for handler in PROCESS_HANDLERS:
|
for handler in PROCESS_HANDLERS:
|
||||||
for feature, addr in handler(ph):
|
for feature, addr in handler(ph):
|
||||||
yield feature, addr
|
yield feature, addr
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ import re
|
|||||||
import logging
|
import logging
|
||||||
import binascii
|
import binascii
|
||||||
import contextlib
|
import contextlib
|
||||||
from typing import Tuple, Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
import pefile
|
import pefile
|
||||||
|
|
||||||
@@ -45,7 +45,7 @@ MATCH_RESULT = b'{"meta":'
|
|||||||
MATCH_JSON_OBJECT = b'{"'
|
MATCH_JSON_OBJECT = b'{"'
|
||||||
|
|
||||||
|
|
||||||
def extract_file_strings(buf: bytes, **kwargs) -> Iterator[Tuple[String, Address]]:
|
def extract_file_strings(buf: bytes, **kwargs) -> Iterator[tuple[String, Address]]:
|
||||||
"""
|
"""
|
||||||
extract ASCII and UTF-16 LE strings from file
|
extract ASCII and UTF-16 LE strings from file
|
||||||
"""
|
"""
|
||||||
@@ -56,7 +56,7 @@ def extract_file_strings(buf: bytes, **kwargs) -> Iterator[Tuple[String, Address
|
|||||||
yield String(s.s), FileOffsetAddress(s.offset)
|
yield String(s.s), FileOffsetAddress(s.offset)
|
||||||
|
|
||||||
|
|
||||||
def extract_format(buf: bytes) -> Iterator[Tuple[Feature, Address]]:
|
def extract_format(buf: bytes) -> Iterator[tuple[Feature, Address]]:
|
||||||
if buf.startswith(MATCH_PE):
|
if buf.startswith(MATCH_PE):
|
||||||
yield Format(FORMAT_PE), NO_ADDRESS
|
yield Format(FORMAT_PE), NO_ADDRESS
|
||||||
elif buf.startswith(MATCH_ELF):
|
elif buf.startswith(MATCH_ELF):
|
||||||
@@ -79,7 +79,7 @@ def extract_format(buf: bytes) -> Iterator[Tuple[Feature, Address]]:
|
|||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
def extract_arch(buf) -> Iterator[Tuple[Feature, Address]]:
|
def extract_arch(buf) -> Iterator[tuple[Feature, Address]]:
|
||||||
if buf.startswith(MATCH_PE):
|
if buf.startswith(MATCH_PE):
|
||||||
yield from capa.features.extractors.pefile.extract_file_arch(pe=pefile.PE(data=buf))
|
yield from capa.features.extractors.pefile.extract_file_arch(pe=pefile.PE(data=buf))
|
||||||
|
|
||||||
@@ -111,7 +111,7 @@ def extract_arch(buf) -> Iterator[Tuple[Feature, Address]]:
|
|||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
def extract_os(buf, os=OS_AUTO) -> Iterator[Tuple[Feature, Address]]:
|
def extract_os(buf, os=OS_AUTO) -> Iterator[tuple[Feature, Address]]:
|
||||||
if os != OS_AUTO:
|
if os != OS_AUTO:
|
||||||
yield OS(os), NO_ADDRESS
|
yield OS(os), NO_ADDRESS
|
||||||
|
|
||||||
|
|||||||
@@ -8,7 +8,7 @@
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from typing import Dict, List, Tuple, Union, Iterator, Optional
|
from typing import Union, Iterator, Optional
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import dnfile
|
import dnfile
|
||||||
@@ -41,11 +41,11 @@ from capa.features.extractors.dnfile.helpers import (
|
|||||||
|
|
||||||
class DnFileFeatureExtractorCache:
|
class DnFileFeatureExtractorCache:
|
||||||
def __init__(self, pe: dnfile.dnPE):
|
def __init__(self, pe: dnfile.dnPE):
|
||||||
self.imports: Dict[int, Union[DnType, DnUnmanagedMethod]] = {}
|
self.imports: dict[int, Union[DnType, DnUnmanagedMethod]] = {}
|
||||||
self.native_imports: Dict[int, Union[DnType, DnUnmanagedMethod]] = {}
|
self.native_imports: dict[int, Union[DnType, DnUnmanagedMethod]] = {}
|
||||||
self.methods: Dict[int, Union[DnType, DnUnmanagedMethod]] = {}
|
self.methods: dict[int, Union[DnType, DnUnmanagedMethod]] = {}
|
||||||
self.fields: Dict[int, Union[DnType, DnUnmanagedMethod]] = {}
|
self.fields: dict[int, Union[DnType, DnUnmanagedMethod]] = {}
|
||||||
self.types: Dict[int, Union[DnType, DnUnmanagedMethod]] = {}
|
self.types: dict[int, Union[DnType, DnUnmanagedMethod]] = {}
|
||||||
|
|
||||||
for import_ in get_dotnet_managed_imports(pe):
|
for import_ in get_dotnet_managed_imports(pe):
|
||||||
self.imports[import_.token] = import_
|
self.imports[import_.token] = import_
|
||||||
@@ -84,7 +84,7 @@ class DnfileFeatureExtractor(StaticFeatureExtractor):
|
|||||||
self.token_cache: DnFileFeatureExtractorCache = DnFileFeatureExtractorCache(self.pe)
|
self.token_cache: DnFileFeatureExtractorCache = DnFileFeatureExtractorCache(self.pe)
|
||||||
|
|
||||||
# pre-compute these because we'll yield them at *every* scope.
|
# pre-compute these because we'll yield them at *every* scope.
|
||||||
self.global_features: List[Tuple[Feature, Address]] = []
|
self.global_features: list[tuple[Feature, Address]] = []
|
||||||
self.global_features.extend(capa.features.extractors.dotnetfile.extract_file_format())
|
self.global_features.extend(capa.features.extractors.dotnetfile.extract_file_format())
|
||||||
self.global_features.extend(capa.features.extractors.dotnetfile.extract_file_os(pe=self.pe))
|
self.global_features.extend(capa.features.extractors.dotnetfile.extract_file_os(pe=self.pe))
|
||||||
self.global_features.extend(capa.features.extractors.dotnetfile.extract_file_arch(pe=self.pe))
|
self.global_features.extend(capa.features.extractors.dotnetfile.extract_file_arch(pe=self.pe))
|
||||||
@@ -100,7 +100,7 @@ class DnfileFeatureExtractor(StaticFeatureExtractor):
|
|||||||
|
|
||||||
def get_functions(self) -> Iterator[FunctionHandle]:
|
def get_functions(self) -> Iterator[FunctionHandle]:
|
||||||
# create a method lookup table
|
# create a method lookup table
|
||||||
methods: Dict[Address, FunctionHandle] = {}
|
methods: dict[Address, FunctionHandle] = {}
|
||||||
for token, method in get_dotnet_managed_method_bodies(self.pe):
|
for token, method in get_dotnet_managed_method_bodies(self.pe):
|
||||||
fh: FunctionHandle = FunctionHandle(
|
fh: FunctionHandle = FunctionHandle(
|
||||||
address=DNTokenAddress(token),
|
address=DNTokenAddress(token),
|
||||||
@@ -136,7 +136,7 @@ class DnfileFeatureExtractor(StaticFeatureExtractor):
|
|||||||
|
|
||||||
yield from methods.values()
|
yield from methods.values()
|
||||||
|
|
||||||
def extract_function_features(self, fh) -> Iterator[Tuple[Feature, Address]]:
|
def extract_function_features(self, fh) -> Iterator[tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.dnfile.function.extract_features(fh)
|
yield from capa.features.extractors.dnfile.function.extract_features(fh)
|
||||||
|
|
||||||
def get_basic_blocks(self, f) -> Iterator[BBHandle]:
|
def get_basic_blocks(self, f) -> Iterator[BBHandle]:
|
||||||
@@ -157,5 +157,5 @@ class DnfileFeatureExtractor(StaticFeatureExtractor):
|
|||||||
inner=insn,
|
inner=insn,
|
||||||
)
|
)
|
||||||
|
|
||||||
def extract_insn_features(self, fh, bbh, ih) -> Iterator[Tuple[Feature, Address]]:
|
def extract_insn_features(self, fh, bbh, ih) -> Iterator[tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.dnfile.insn.extract_features(fh, bbh, ih)
|
yield from capa.features.extractors.dnfile.insn.extract_features(fh, bbh, ih)
|
||||||
|
|||||||
@@ -8,7 +8,7 @@
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from typing import Tuple, Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
import dnfile
|
import dnfile
|
||||||
|
|
||||||
@@ -18,35 +18,35 @@ from capa.features.common import Class, Format, String, Feature, Namespace, Char
|
|||||||
from capa.features.address import Address
|
from capa.features.address import Address
|
||||||
|
|
||||||
|
|
||||||
def extract_file_import_names(pe: dnfile.dnPE) -> Iterator[Tuple[Import, Address]]:
|
def extract_file_import_names(pe: dnfile.dnPE) -> Iterator[tuple[Import, Address]]:
|
||||||
yield from capa.features.extractors.dotnetfile.extract_file_import_names(pe=pe)
|
yield from capa.features.extractors.dotnetfile.extract_file_import_names(pe=pe)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_format(pe: dnfile.dnPE) -> Iterator[Tuple[Format, Address]]:
|
def extract_file_format(pe: dnfile.dnPE) -> Iterator[tuple[Format, Address]]:
|
||||||
yield from capa.features.extractors.dotnetfile.extract_file_format(pe=pe)
|
yield from capa.features.extractors.dotnetfile.extract_file_format(pe=pe)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_function_names(pe: dnfile.dnPE) -> Iterator[Tuple[FunctionName, Address]]:
|
def extract_file_function_names(pe: dnfile.dnPE) -> Iterator[tuple[FunctionName, Address]]:
|
||||||
yield from capa.features.extractors.dotnetfile.extract_file_function_names(pe=pe)
|
yield from capa.features.extractors.dotnetfile.extract_file_function_names(pe=pe)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_strings(pe: dnfile.dnPE) -> Iterator[Tuple[String, Address]]:
|
def extract_file_strings(pe: dnfile.dnPE) -> Iterator[tuple[String, Address]]:
|
||||||
yield from capa.features.extractors.dotnetfile.extract_file_strings(pe=pe)
|
yield from capa.features.extractors.dotnetfile.extract_file_strings(pe=pe)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_mixed_mode_characteristic_features(pe: dnfile.dnPE) -> Iterator[Tuple[Characteristic, Address]]:
|
def extract_file_mixed_mode_characteristic_features(pe: dnfile.dnPE) -> Iterator[tuple[Characteristic, Address]]:
|
||||||
yield from capa.features.extractors.dotnetfile.extract_file_mixed_mode_characteristic_features(pe=pe)
|
yield from capa.features.extractors.dotnetfile.extract_file_mixed_mode_characteristic_features(pe=pe)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_namespace_features(pe: dnfile.dnPE) -> Iterator[Tuple[Namespace, Address]]:
|
def extract_file_namespace_features(pe: dnfile.dnPE) -> Iterator[tuple[Namespace, Address]]:
|
||||||
yield from capa.features.extractors.dotnetfile.extract_file_namespace_features(pe=pe)
|
yield from capa.features.extractors.dotnetfile.extract_file_namespace_features(pe=pe)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_class_features(pe: dnfile.dnPE) -> Iterator[Tuple[Class, Address]]:
|
def extract_file_class_features(pe: dnfile.dnPE) -> Iterator[tuple[Class, Address]]:
|
||||||
yield from capa.features.extractors.dotnetfile.extract_file_class_features(pe=pe)
|
yield from capa.features.extractors.dotnetfile.extract_file_class_features(pe=pe)
|
||||||
|
|
||||||
|
|
||||||
def extract_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, Address]]:
|
def extract_features(pe: dnfile.dnPE) -> Iterator[tuple[Feature, Address]]:
|
||||||
for file_handler in FILE_HANDLERS:
|
for file_handler in FILE_HANDLERS:
|
||||||
for feature, address in file_handler(pe):
|
for feature, address in file_handler(pe):
|
||||||
yield feature, address
|
yield feature, address
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from typing import Tuple, Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
from capa.features.common import Feature, Characteristic
|
from capa.features.common import Feature, Characteristic
|
||||||
from capa.features.address import Address
|
from capa.features.address import Address
|
||||||
@@ -18,30 +18,30 @@ from capa.features.extractors.base_extractor import FunctionHandle
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def extract_function_calls_to(fh: FunctionHandle) -> Iterator[Tuple[Characteristic, Address]]:
|
def extract_function_calls_to(fh: FunctionHandle) -> Iterator[tuple[Characteristic, Address]]:
|
||||||
"""extract callers to a function"""
|
"""extract callers to a function"""
|
||||||
for dest in fh.ctx["calls_to"]:
|
for dest in fh.ctx["calls_to"]:
|
||||||
yield Characteristic("calls to"), dest
|
yield Characteristic("calls to"), dest
|
||||||
|
|
||||||
|
|
||||||
def extract_function_calls_from(fh: FunctionHandle) -> Iterator[Tuple[Characteristic, Address]]:
|
def extract_function_calls_from(fh: FunctionHandle) -> Iterator[tuple[Characteristic, Address]]:
|
||||||
"""extract callers from a function"""
|
"""extract callers from a function"""
|
||||||
for src in fh.ctx["calls_from"]:
|
for src in fh.ctx["calls_from"]:
|
||||||
yield Characteristic("calls from"), src
|
yield Characteristic("calls from"), src
|
||||||
|
|
||||||
|
|
||||||
def extract_recursive_call(fh: FunctionHandle) -> Iterator[Tuple[Characteristic, Address]]:
|
def extract_recursive_call(fh: FunctionHandle) -> Iterator[tuple[Characteristic, Address]]:
|
||||||
"""extract recursive function call"""
|
"""extract recursive function call"""
|
||||||
if fh.address in fh.ctx["calls_to"]:
|
if fh.address in fh.ctx["calls_to"]:
|
||||||
yield Characteristic("recursive call"), fh.address
|
yield Characteristic("recursive call"), fh.address
|
||||||
|
|
||||||
|
|
||||||
def extract_function_loop(fh: FunctionHandle) -> Iterator[Tuple[Characteristic, Address]]:
|
def extract_function_loop(fh: FunctionHandle) -> Iterator[tuple[Characteristic, Address]]:
|
||||||
"""extract loop indicators from a function"""
|
"""extract loop indicators from a function"""
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
|
||||||
def extract_features(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_features(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
for func_handler in FUNCTION_HANDLERS:
|
for func_handler in FUNCTION_HANDLERS:
|
||||||
for feature, addr in func_handler(fh):
|
for feature, addr in func_handler(fh):
|
||||||
yield feature, addr
|
yield feature, addr
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from typing import Dict, Tuple, Union, Iterator, Optional
|
from typing import Union, Iterator, Optional
|
||||||
|
|
||||||
import dnfile
|
import dnfile
|
||||||
from dncil.cil.body import CilMethodBody
|
from dncil.cil.body import CilMethodBody
|
||||||
@@ -144,7 +144,7 @@ def get_dotnet_managed_imports(pe: dnfile.dnPE) -> Iterator[DnType]:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def get_dotnet_methoddef_property_accessors(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]:
|
def get_dotnet_methoddef_property_accessors(pe: dnfile.dnPE) -> Iterator[tuple[int, str]]:
|
||||||
"""get MethodDef methods used to access properties
|
"""get MethodDef methods used to access properties
|
||||||
|
|
||||||
see https://www.ntcore.com/files/dotnetformat.htm
|
see https://www.ntcore.com/files/dotnetformat.htm
|
||||||
@@ -194,7 +194,7 @@ def get_dotnet_managed_methods(pe: dnfile.dnPE) -> Iterator[DnType]:
|
|||||||
"""
|
"""
|
||||||
nested_class_table = get_dotnet_nested_class_table_index(pe)
|
nested_class_table = get_dotnet_nested_class_table_index(pe)
|
||||||
|
|
||||||
accessor_map: Dict[int, str] = {}
|
accessor_map: dict[int, str] = {}
|
||||||
for methoddef, methoddef_access in get_dotnet_methoddef_property_accessors(pe):
|
for methoddef, methoddef_access in get_dotnet_methoddef_property_accessors(pe):
|
||||||
accessor_map[methoddef] = methoddef_access
|
accessor_map[methoddef] = methoddef_access
|
||||||
|
|
||||||
@@ -252,7 +252,7 @@ def get_dotnet_fields(pe: dnfile.dnPE) -> Iterator[DnType]:
|
|||||||
yield DnType(token, typedefname, namespace=typedefnamespace, member=field.row.Name)
|
yield DnType(token, typedefname, namespace=typedefnamespace, member=field.row.Name)
|
||||||
|
|
||||||
|
|
||||||
def get_dotnet_managed_method_bodies(pe: dnfile.dnPE) -> Iterator[Tuple[int, CilMethodBody]]:
|
def get_dotnet_managed_method_bodies(pe: dnfile.dnPE) -> Iterator[tuple[int, CilMethodBody]]:
|
||||||
"""get managed methods from MethodDef table"""
|
"""get managed methods from MethodDef table"""
|
||||||
for rid, method_def in iter_dotnet_table(pe, dnfile.mdtable.MethodDef.number):
|
for rid, method_def in iter_dotnet_table(pe, dnfile.mdtable.MethodDef.number):
|
||||||
assert isinstance(method_def, dnfile.mdtable.MethodDefRow)
|
assert isinstance(method_def, dnfile.mdtable.MethodDefRow)
|
||||||
@@ -332,7 +332,7 @@ def get_dotnet_table_row(pe: dnfile.dnPE, table_index: int, row_index: int) -> O
|
|||||||
|
|
||||||
def resolve_nested_typedef_name(
|
def resolve_nested_typedef_name(
|
||||||
nested_class_table: dict, index: int, typedef: dnfile.mdtable.TypeDefRow, pe: dnfile.dnPE
|
nested_class_table: dict, index: int, typedef: dnfile.mdtable.TypeDefRow, pe: dnfile.dnPE
|
||||||
) -> Tuple[str, Tuple[str, ...]]:
|
) -> tuple[str, tuple[str, ...]]:
|
||||||
"""Resolves all nested TypeDef class names. Returns the namespace as a str and the nested TypeRef name as a tuple"""
|
"""Resolves all nested TypeDef class names. Returns the namespace as a str and the nested TypeRef name as a tuple"""
|
||||||
|
|
||||||
if index in nested_class_table:
|
if index in nested_class_table:
|
||||||
@@ -368,7 +368,7 @@ def resolve_nested_typedef_name(
|
|||||||
|
|
||||||
def resolve_nested_typeref_name(
|
def resolve_nested_typeref_name(
|
||||||
index: int, typeref: dnfile.mdtable.TypeRefRow, pe: dnfile.dnPE
|
index: int, typeref: dnfile.mdtable.TypeRefRow, pe: dnfile.dnPE
|
||||||
) -> Tuple[str, Tuple[str, ...]]:
|
) -> tuple[str, tuple[str, ...]]:
|
||||||
"""Resolves all nested TypeRef class names. Returns the namespace as a str and the nested TypeRef name as a tuple"""
|
"""Resolves all nested TypeRef class names. Returns the namespace as a str and the nested TypeRef name as a tuple"""
|
||||||
# If the ResolutionScope decodes to a typeRef type then it is nested
|
# If the ResolutionScope decodes to a typeRef type then it is nested
|
||||||
if isinstance(typeref.ResolutionScope.table, dnfile.mdtable.TypeRef):
|
if isinstance(typeref.ResolutionScope.table, dnfile.mdtable.TypeRef):
|
||||||
@@ -398,7 +398,7 @@ def resolve_nested_typeref_name(
|
|||||||
return str(typeref.TypeNamespace), (str(typeref.TypeName),)
|
return str(typeref.TypeNamespace), (str(typeref.TypeName),)
|
||||||
|
|
||||||
|
|
||||||
def get_dotnet_nested_class_table_index(pe: dnfile.dnPE) -> Dict[int, int]:
|
def get_dotnet_nested_class_table_index(pe: dnfile.dnPE) -> dict[int, int]:
|
||||||
"""Build index for EnclosingClass based off the NestedClass row index in the nestedclass table"""
|
"""Build index for EnclosingClass based off the NestedClass row index in the nestedclass table"""
|
||||||
nested_class_table = {}
|
nested_class_table = {}
|
||||||
|
|
||||||
@@ -442,7 +442,7 @@ def is_dotnet_mixed_mode(pe: dnfile.dnPE) -> bool:
|
|||||||
return not bool(pe.net.Flags.CLR_ILONLY)
|
return not bool(pe.net.Flags.CLR_ILONLY)
|
||||||
|
|
||||||
|
|
||||||
def iter_dotnet_table(pe: dnfile.dnPE, table_index: int) -> Iterator[Tuple[int, dnfile.base.MDTableRow]]:
|
def iter_dotnet_table(pe: dnfile.dnPE, table_index: int) -> Iterator[tuple[int, dnfile.base.MDTableRow]]:
|
||||||
assert pe.net is not None
|
assert pe.net is not None
|
||||||
assert pe.net.mdtables is not None
|
assert pe.net.mdtables is not None
|
||||||
|
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from typing import TYPE_CHECKING, Tuple, Union, Iterator, Optional
|
from typing import TYPE_CHECKING, Union, Iterator, Optional
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from capa.features.extractors.dnfile.extractor import DnFileFeatureExtractorCache
|
from capa.features.extractors.dnfile.extractor import DnFileFeatureExtractorCache
|
||||||
@@ -61,7 +61,7 @@ def get_callee(
|
|||||||
return callee
|
return callee
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_api_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_insn_api_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""parse instruction API features"""
|
"""parse instruction API features"""
|
||||||
if ih.inner.opcode not in (
|
if ih.inner.opcode not in (
|
||||||
OpCodes.Call,
|
OpCodes.Call,
|
||||||
@@ -83,7 +83,7 @@ def extract_insn_api_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterato
|
|||||||
yield API(name), ih.address
|
yield API(name), ih.address
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_property_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_insn_property_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""parse instruction property features"""
|
"""parse instruction property features"""
|
||||||
name: Optional[str] = None
|
name: Optional[str] = None
|
||||||
access: Optional[str] = None
|
access: Optional[str] = None
|
||||||
@@ -118,7 +118,7 @@ def extract_insn_property_features(fh: FunctionHandle, bh, ih: InsnHandle) -> It
|
|||||||
|
|
||||||
def extract_insn_namespace_class_features(
|
def extract_insn_namespace_class_features(
|
||||||
fh: FunctionHandle, bh, ih: InsnHandle
|
fh: FunctionHandle, bh, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Union[Namespace, Class], Address]]:
|
) -> Iterator[tuple[Union[Namespace, Class], Address]]:
|
||||||
"""parse instruction namespace and class features"""
|
"""parse instruction namespace and class features"""
|
||||||
type_: Optional[Union[DnType, DnUnmanagedMethod]] = None
|
type_: Optional[Union[DnType, DnUnmanagedMethod]] = None
|
||||||
|
|
||||||
@@ -173,13 +173,13 @@ def extract_insn_namespace_class_features(
|
|||||||
yield Namespace(type_.namespace), ih.address
|
yield Namespace(type_.namespace), ih.address
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_number_features(fh, bh, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_insn_number_features(fh, bh, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""parse instruction number features"""
|
"""parse instruction number features"""
|
||||||
if ih.inner.is_ldc():
|
if ih.inner.is_ldc():
|
||||||
yield Number(ih.inner.get_ldc()), ih.address
|
yield Number(ih.inner.get_ldc()), ih.address
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_string_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_insn_string_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""parse instruction string features"""
|
"""parse instruction string features"""
|
||||||
if not ih.inner.is_ldstr():
|
if not ih.inner.is_ldstr():
|
||||||
return
|
return
|
||||||
@@ -197,7 +197,7 @@ def extract_insn_string_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iter
|
|||||||
|
|
||||||
def extract_unmanaged_call_characteristic_features(
|
def extract_unmanaged_call_characteristic_features(
|
||||||
fh: FunctionHandle, bb: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bb: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Characteristic, Address]]:
|
) -> Iterator[tuple[Characteristic, Address]]:
|
||||||
if ih.inner.opcode not in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp):
|
if ih.inner.opcode not in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp):
|
||||||
return
|
return
|
||||||
|
|
||||||
@@ -209,7 +209,7 @@ def extract_unmanaged_call_characteristic_features(
|
|||||||
yield Characteristic("unmanaged call"), ih.address
|
yield Characteristic("unmanaged call"), ih.address
|
||||||
|
|
||||||
|
|
||||||
def extract_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""extract instruction features"""
|
"""extract instruction features"""
|
||||||
for inst_handler in INSTRUCTION_HANDLERS:
|
for inst_handler in INSTRUCTION_HANDLERS:
|
||||||
for feature, addr in inst_handler(fh, bbh, ih):
|
for feature, addr in inst_handler(fh, bbh, ih):
|
||||||
|
|||||||
@@ -6,17 +6,17 @@
|
|||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
|
||||||
from typing import Tuple, Optional
|
from typing import Optional
|
||||||
|
|
||||||
|
|
||||||
class DnType:
|
class DnType:
|
||||||
def __init__(
|
def __init__(
|
||||||
self, token: int, class_: Tuple[str, ...], namespace: str = "", member: str = "", access: Optional[str] = None
|
self, token: int, class_: tuple[str, ...], namespace: str = "", member: str = "", access: Optional[str] = None
|
||||||
):
|
):
|
||||||
self.token: int = token
|
self.token: int = token
|
||||||
self.access: Optional[str] = access
|
self.access: Optional[str] = access
|
||||||
self.namespace: str = namespace
|
self.namespace: str = namespace
|
||||||
self.class_: Tuple[str, ...] = class_
|
self.class_: tuple[str, ...] = class_
|
||||||
|
|
||||||
if member == ".ctor":
|
if member == ".ctor":
|
||||||
member = "ctor"
|
member = "ctor"
|
||||||
@@ -44,7 +44,7 @@ class DnType:
|
|||||||
return str(self)
|
return str(self)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def format_name(class_: Tuple[str, ...], namespace: str = "", member: str = ""):
|
def format_name(class_: tuple[str, ...], namespace: str = "", member: str = ""):
|
||||||
if len(class_) > 1:
|
if len(class_) > 1:
|
||||||
class_str = "/".join(class_) # Concat items in tuple, separated by a "/"
|
class_str = "/".join(class_) # Concat items in tuple, separated by a "/"
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
import logging
|
import logging
|
||||||
from typing import Tuple, Iterator
|
from typing import Iterator
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import dnfile
|
import dnfile
|
||||||
@@ -48,12 +48,12 @@ from capa.features.extractors.dnfile.helpers import (
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_format(**kwargs) -> Iterator[Tuple[Format, Address]]:
|
def extract_file_format(**kwargs) -> Iterator[tuple[Format, Address]]:
|
||||||
yield Format(FORMAT_DOTNET), NO_ADDRESS
|
yield Format(FORMAT_DOTNET), NO_ADDRESS
|
||||||
yield Format(FORMAT_PE), NO_ADDRESS
|
yield Format(FORMAT_PE), NO_ADDRESS
|
||||||
|
|
||||||
|
|
||||||
def extract_file_import_names(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Import, Address]]:
|
def extract_file_import_names(pe: dnfile.dnPE, **kwargs) -> Iterator[tuple[Import, Address]]:
|
||||||
for method in get_dotnet_managed_imports(pe):
|
for method in get_dotnet_managed_imports(pe):
|
||||||
# like System.IO.File::OpenRead
|
# like System.IO.File::OpenRead
|
||||||
yield Import(str(method)), DNTokenAddress(method.token)
|
yield Import(str(method)), DNTokenAddress(method.token)
|
||||||
@@ -64,12 +64,12 @@ def extract_file_import_names(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Impor
|
|||||||
yield Import(name), DNTokenAddress(imp.token)
|
yield Import(name), DNTokenAddress(imp.token)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_function_names(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[FunctionName, Address]]:
|
def extract_file_function_names(pe: dnfile.dnPE, **kwargs) -> Iterator[tuple[FunctionName, Address]]:
|
||||||
for method in get_dotnet_managed_methods(pe):
|
for method in get_dotnet_managed_methods(pe):
|
||||||
yield FunctionName(str(method)), DNTokenAddress(method.token)
|
yield FunctionName(str(method)), DNTokenAddress(method.token)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_namespace_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Namespace, Address]]:
|
def extract_file_namespace_features(pe: dnfile.dnPE, **kwargs) -> Iterator[tuple[Namespace, Address]]:
|
||||||
"""emit namespace features from TypeRef and TypeDef tables"""
|
"""emit namespace features from TypeRef and TypeDef tables"""
|
||||||
|
|
||||||
# namespaces may be referenced multiple times, so we need to filter
|
# namespaces may be referenced multiple times, so we need to filter
|
||||||
@@ -93,7 +93,7 @@ def extract_file_namespace_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple
|
|||||||
yield Namespace(namespace), NO_ADDRESS
|
yield Namespace(namespace), NO_ADDRESS
|
||||||
|
|
||||||
|
|
||||||
def extract_file_class_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Class, Address]]:
|
def extract_file_class_features(pe: dnfile.dnPE, **kwargs) -> Iterator[tuple[Class, Address]]:
|
||||||
"""emit class features from TypeRef and TypeDef tables"""
|
"""emit class features from TypeRef and TypeDef tables"""
|
||||||
nested_class_table = get_dotnet_nested_class_table_index(pe)
|
nested_class_table = get_dotnet_nested_class_table_index(pe)
|
||||||
|
|
||||||
@@ -116,11 +116,11 @@ def extract_file_class_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Cla
|
|||||||
yield Class(DnType.format_name(typerefname, namespace=typerefnamespace)), DNTokenAddress(token)
|
yield Class(DnType.format_name(typerefname, namespace=typerefnamespace)), DNTokenAddress(token)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_os(**kwargs) -> Iterator[Tuple[OS, Address]]:
|
def extract_file_os(**kwargs) -> Iterator[tuple[OS, Address]]:
|
||||||
yield OS(OS_ANY), NO_ADDRESS
|
yield OS(OS_ANY), NO_ADDRESS
|
||||||
|
|
||||||
|
|
||||||
def extract_file_arch(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Arch, Address]]:
|
def extract_file_arch(pe: dnfile.dnPE, **kwargs) -> Iterator[tuple[Arch, Address]]:
|
||||||
# to distinguish in more detail, see https://stackoverflow.com/a/23614024/10548020
|
# to distinguish in more detail, see https://stackoverflow.com/a/23614024/10548020
|
||||||
# .NET 4.5 added option: any CPU, 32-bit preferred
|
# .NET 4.5 added option: any CPU, 32-bit preferred
|
||||||
assert pe.net is not None
|
assert pe.net is not None
|
||||||
@@ -134,18 +134,18 @@ def extract_file_arch(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Arch, Address
|
|||||||
yield Arch(ARCH_ANY), NO_ADDRESS
|
yield Arch(ARCH_ANY), NO_ADDRESS
|
||||||
|
|
||||||
|
|
||||||
def extract_file_strings(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[String, Address]]:
|
def extract_file_strings(pe: dnfile.dnPE, **kwargs) -> Iterator[tuple[String, Address]]:
|
||||||
yield from capa.features.extractors.common.extract_file_strings(pe.__data__)
|
yield from capa.features.extractors.common.extract_file_strings(pe.__data__)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_mixed_mode_characteristic_features(
|
def extract_file_mixed_mode_characteristic_features(
|
||||||
pe: dnfile.dnPE, **kwargs
|
pe: dnfile.dnPE, **kwargs
|
||||||
) -> Iterator[Tuple[Characteristic, Address]]:
|
) -> Iterator[tuple[Characteristic, Address]]:
|
||||||
if is_dotnet_mixed_mode(pe):
|
if is_dotnet_mixed_mode(pe):
|
||||||
yield Characteristic("mixed mode"), NO_ADDRESS
|
yield Characteristic("mixed mode"), NO_ADDRESS
|
||||||
|
|
||||||
|
|
||||||
def extract_file_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, Address]]:
|
def extract_file_features(pe: dnfile.dnPE) -> Iterator[tuple[Feature, Address]]:
|
||||||
for file_handler in FILE_HANDLERS:
|
for file_handler in FILE_HANDLERS:
|
||||||
for feature, addr in file_handler(pe=pe): # type: ignore
|
for feature, addr in file_handler(pe=pe): # type: ignore
|
||||||
yield feature, addr
|
yield feature, addr
|
||||||
@@ -162,7 +162,7 @@ FILE_HANDLERS = (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def extract_global_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, Address]]:
|
def extract_global_features(pe: dnfile.dnPE) -> Iterator[tuple[Feature, Address]]:
|
||||||
for handler in GLOBAL_HANDLERS:
|
for handler in GLOBAL_HANDLERS:
|
||||||
for feature, va in handler(pe=pe): # type: ignore
|
for feature, va in handler(pe=pe): # type: ignore
|
||||||
yield feature, va
|
yield feature, va
|
||||||
@@ -204,7 +204,7 @@ class DotnetFileFeatureExtractor(StaticFeatureExtractor):
|
|||||||
def is_mixed_mode(self) -> bool:
|
def is_mixed_mode(self) -> bool:
|
||||||
return is_dotnet_mixed_mode(self.pe)
|
return is_dotnet_mixed_mode(self.pe)
|
||||||
|
|
||||||
def get_runtime_version(self) -> Tuple[int, int]:
|
def get_runtime_version(self) -> tuple[int, int]:
|
||||||
assert self.pe.net is not None
|
assert self.pe.net is not None
|
||||||
assert self.pe.net.struct is not None
|
assert self.pe.net.struct is not None
|
||||||
assert self.pe.net.struct.MajorRuntimeVersion is not None
|
assert self.pe.net.struct.MajorRuntimeVersion is not None
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from typing import Tuple, Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
import capa.features.extractors.helpers
|
import capa.features.extractors.helpers
|
||||||
from capa.features.insn import API, Number
|
from capa.features.insn import API, Number
|
||||||
@@ -19,7 +19,7 @@ from capa.features.extractors.drakvuf.models import Call
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
This method extracts the given call's features (such as API name and arguments),
|
This method extracts the given call's features (such as API name and arguments),
|
||||||
and returns them as API, Number, and String features.
|
and returns them as API, Number, and String features.
|
||||||
@@ -49,7 +49,7 @@ def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -
|
|||||||
yield API(name), ch.address
|
yield API(name), ch.address
|
||||||
|
|
||||||
|
|
||||||
def extract_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
for handler in CALL_HANDLERS:
|
for handler in CALL_HANDLERS:
|
||||||
for feature, addr in handler(ph, th, ch):
|
for feature, addr in handler(ph, th, ch):
|
||||||
yield feature, addr
|
yield feature, addr
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from typing import Dict, List, Tuple, Union, Iterator
|
from typing import Union, Iterator
|
||||||
|
|
||||||
import capa.features.extractors.drakvuf.call
|
import capa.features.extractors.drakvuf.call
|
||||||
import capa.features.extractors.drakvuf.file
|
import capa.features.extractors.drakvuf.file
|
||||||
@@ -39,7 +39,7 @@ class DrakvufExtractor(DynamicFeatureExtractor):
|
|||||||
self.report: DrakvufReport = report
|
self.report: DrakvufReport = report
|
||||||
|
|
||||||
# sort the api calls to prevent going through the entire list each time
|
# sort the api calls to prevent going through the entire list each time
|
||||||
self.sorted_calls: Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]] = index_calls(report)
|
self.sorted_calls: dict[ProcessAddress, dict[ThreadAddress, list[Call]]] = index_calls(report)
|
||||||
|
|
||||||
# pre-compute these because we'll yield them at *every* scope.
|
# pre-compute these because we'll yield them at *every* scope.
|
||||||
self.global_features = list(capa.features.extractors.drakvuf.global_.extract_features(self.report))
|
self.global_features = list(capa.features.extractors.drakvuf.global_.extract_features(self.report))
|
||||||
@@ -48,16 +48,16 @@ class DrakvufExtractor(DynamicFeatureExtractor):
|
|||||||
# DRAKVUF currently does not yield information about the PE's address
|
# DRAKVUF currently does not yield information about the PE's address
|
||||||
return NO_ADDRESS
|
return NO_ADDRESS
|
||||||
|
|
||||||
def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]:
|
def extract_global_features(self) -> Iterator[tuple[Feature, Address]]:
|
||||||
yield from self.global_features
|
yield from self.global_features
|
||||||
|
|
||||||
def extract_file_features(self) -> Iterator[Tuple[Feature, Address]]:
|
def extract_file_features(self) -> Iterator[tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.drakvuf.file.extract_features(self.report)
|
yield from capa.features.extractors.drakvuf.file.extract_features(self.report)
|
||||||
|
|
||||||
def get_processes(self) -> Iterator[ProcessHandle]:
|
def get_processes(self) -> Iterator[ProcessHandle]:
|
||||||
yield from capa.features.extractors.drakvuf.file.get_processes(self.sorted_calls)
|
yield from capa.features.extractors.drakvuf.file.get_processes(self.sorted_calls)
|
||||||
|
|
||||||
def extract_process_features(self, ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_process_features(self, ph: ProcessHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.drakvuf.process.extract_features(ph)
|
yield from capa.features.extractors.drakvuf.process.extract_features(ph)
|
||||||
|
|
||||||
def get_process_name(self, ph: ProcessHandle) -> str:
|
def get_process_name(self, ph: ProcessHandle) -> str:
|
||||||
@@ -66,7 +66,7 @@ class DrakvufExtractor(DynamicFeatureExtractor):
|
|||||||
def get_threads(self, ph: ProcessHandle) -> Iterator[ThreadHandle]:
|
def get_threads(self, ph: ProcessHandle) -> Iterator[ThreadHandle]:
|
||||||
yield from capa.features.extractors.drakvuf.process.get_threads(self.sorted_calls, ph)
|
yield from capa.features.extractors.drakvuf.process.get_threads(self.sorted_calls, ph)
|
||||||
|
|
||||||
def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
if False:
|
if False:
|
||||||
# force this routine to be a generator,
|
# force this routine to be a generator,
|
||||||
# but we don't actually have any elements to generate.
|
# but we don't actually have any elements to generate.
|
||||||
@@ -87,10 +87,10 @@ class DrakvufExtractor(DynamicFeatureExtractor):
|
|||||||
|
|
||||||
def extract_call_features(
|
def extract_call_features(
|
||||||
self, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle
|
self, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.drakvuf.call.extract_features(ph, th, ch)
|
yield from capa.features.extractors.drakvuf.call.extract_features(ph, th, ch)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_report(cls, report: Iterator[Dict]) -> "DrakvufExtractor":
|
def from_report(cls, report: Iterator[dict]) -> "DrakvufExtractor":
|
||||||
dr = DrakvufReport.from_raw_report(report)
|
dr = DrakvufReport.from_raw_report(report)
|
||||||
return DrakvufExtractor(report=dr)
|
return DrakvufExtractor(report=dr)
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from typing import Dict, List, Tuple, Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
from capa.features.file import Import
|
from capa.features.file import Import
|
||||||
from capa.features.common import Feature
|
from capa.features.common import Feature
|
||||||
@@ -19,7 +19,7 @@ from capa.features.extractors.drakvuf.models import Call, DrakvufReport
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def get_processes(calls: Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]]) -> Iterator[ProcessHandle]:
|
def get_processes(calls: dict[ProcessAddress, dict[ThreadAddress, list[Call]]]) -> Iterator[ProcessHandle]:
|
||||||
"""
|
"""
|
||||||
Get all the created processes for a sample.
|
Get all the created processes for a sample.
|
||||||
"""
|
"""
|
||||||
@@ -28,7 +28,7 @@ def get_processes(calls: Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]])
|
|||||||
yield ProcessHandle(proc_addr, inner={"process_name": sample_call.process_name})
|
yield ProcessHandle(proc_addr, inner={"process_name": sample_call.process_name})
|
||||||
|
|
||||||
|
|
||||||
def extract_import_names(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]]:
|
def extract_import_names(report: DrakvufReport) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
Extract imported function names.
|
Extract imported function names.
|
||||||
"""
|
"""
|
||||||
@@ -43,7 +43,7 @@ def extract_import_names(report: DrakvufReport) -> Iterator[Tuple[Feature, Addre
|
|||||||
yield Import(name), AbsoluteVirtualAddress(function_address)
|
yield Import(name), AbsoluteVirtualAddress(function_address)
|
||||||
|
|
||||||
|
|
||||||
def extract_features(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]]:
|
def extract_features(report: DrakvufReport) -> Iterator[tuple[Feature, Address]]:
|
||||||
for handler in FILE_HANDLERS:
|
for handler in FILE_HANDLERS:
|
||||||
for feature, addr in handler(report):
|
for feature, addr in handler(report):
|
||||||
yield feature, addr
|
yield feature, addr
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from typing import Tuple, Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
from capa.features.common import OS, FORMAT_PE, ARCH_AMD64, OS_WINDOWS, Arch, Format, Feature
|
from capa.features.common import OS, FORMAT_PE, ARCH_AMD64, OS_WINDOWS, Arch, Format, Feature
|
||||||
from capa.features.address import NO_ADDRESS, Address
|
from capa.features.address import NO_ADDRESS, Address
|
||||||
@@ -16,22 +16,22 @@ from capa.features.extractors.drakvuf.models import DrakvufReport
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def extract_format(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]]:
|
def extract_format(report: DrakvufReport) -> Iterator[tuple[Feature, Address]]:
|
||||||
# DRAKVUF sandbox currently supports only Windows as the guest: https://drakvuf-sandbox.readthedocs.io/en/latest/usage/getting_started.html
|
# DRAKVUF sandbox currently supports only Windows as the guest: https://drakvuf-sandbox.readthedocs.io/en/latest/usage/getting_started.html
|
||||||
yield Format(FORMAT_PE), NO_ADDRESS
|
yield Format(FORMAT_PE), NO_ADDRESS
|
||||||
|
|
||||||
|
|
||||||
def extract_os(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]]:
|
def extract_os(report: DrakvufReport) -> Iterator[tuple[Feature, Address]]:
|
||||||
# DRAKVUF sandbox currently supports only PE files: https://drakvuf-sandbox.readthedocs.io/en/latest/usage/getting_started.html
|
# DRAKVUF sandbox currently supports only PE files: https://drakvuf-sandbox.readthedocs.io/en/latest/usage/getting_started.html
|
||||||
yield OS(OS_WINDOWS), NO_ADDRESS
|
yield OS(OS_WINDOWS), NO_ADDRESS
|
||||||
|
|
||||||
|
|
||||||
def extract_arch(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]]:
|
def extract_arch(report: DrakvufReport) -> Iterator[tuple[Feature, Address]]:
|
||||||
# DRAKVUF sandbox currently supports only x64 Windows as the guest: https://drakvuf-sandbox.readthedocs.io/en/latest/usage/getting_started.html
|
# DRAKVUF sandbox currently supports only x64 Windows as the guest: https://drakvuf-sandbox.readthedocs.io/en/latest/usage/getting_started.html
|
||||||
yield Arch(ARCH_AMD64), NO_ADDRESS
|
yield Arch(ARCH_AMD64), NO_ADDRESS
|
||||||
|
|
||||||
|
|
||||||
def extract_features(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]]:
|
def extract_features(report: DrakvufReport) -> Iterator[tuple[Feature, Address]]:
|
||||||
for global_handler in GLOBAL_HANDLER:
|
for global_handler in GLOBAL_HANDLER:
|
||||||
for feature, addr in global_handler(report):
|
for feature, addr in global_handler(report):
|
||||||
yield feature, addr
|
yield feature, addr
|
||||||
|
|||||||
@@ -7,16 +7,15 @@
|
|||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
|
||||||
import itertools
|
import itertools
|
||||||
from typing import Dict, List
|
|
||||||
|
|
||||||
from capa.features.address import ThreadAddress, ProcessAddress
|
from capa.features.address import ThreadAddress, ProcessAddress
|
||||||
from capa.features.extractors.drakvuf.models import Call, DrakvufReport
|
from capa.features.extractors.drakvuf.models import Call, DrakvufReport
|
||||||
|
|
||||||
|
|
||||||
def index_calls(report: DrakvufReport) -> Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]]:
|
def index_calls(report: DrakvufReport) -> dict[ProcessAddress, dict[ThreadAddress, list[Call]]]:
|
||||||
# this method organizes calls into processes and threads, and then sorts them based on
|
# this method organizes calls into processes and threads, and then sorts them based on
|
||||||
# timestamp so that we can address individual calls per index (CallAddress requires call index)
|
# timestamp so that we can address individual calls per index (CallAddress requires call index)
|
||||||
result: Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]] = {}
|
result: dict[ProcessAddress, dict[ThreadAddress, list[Call]]] = {}
|
||||||
for call in itertools.chain(report.syscalls, report.apicalls):
|
for call in itertools.chain(report.syscalls, report.apicalls):
|
||||||
if call.pid == 0:
|
if call.pid == 0:
|
||||||
# DRAKVUF captures api/native calls from all processes running on the system.
|
# DRAKVUF captures api/native calls from all processes running on the system.
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
import logging
|
import logging
|
||||||
from typing import Any, Dict, List, Iterator
|
from typing import Any, Iterator
|
||||||
|
|
||||||
from pydantic import Field, BaseModel, ConfigDict, model_validator
|
from pydantic import Field, BaseModel, ConfigDict, model_validator
|
||||||
|
|
||||||
@@ -47,7 +47,7 @@ class LoadedDLL(ConciseModel):
|
|||||||
plugin_name: str = Field(alias="Plugin")
|
plugin_name: str = Field(alias="Plugin")
|
||||||
event: str = Field(alias="Event")
|
event: str = Field(alias="Event")
|
||||||
name: str = Field(alias="DllName")
|
name: str = Field(alias="DllName")
|
||||||
imports: Dict[str, int] = Field(alias="Rva")
|
imports: dict[str, int] = Field(alias="Rva")
|
||||||
|
|
||||||
|
|
||||||
class Call(ConciseModel):
|
class Call(ConciseModel):
|
||||||
@@ -58,18 +58,18 @@ class Call(ConciseModel):
|
|||||||
pid: int = Field(alias="PID")
|
pid: int = Field(alias="PID")
|
||||||
tid: int = Field(alias="TID")
|
tid: int = Field(alias="TID")
|
||||||
name: str = Field(alias="Method")
|
name: str = Field(alias="Method")
|
||||||
arguments: Dict[str, str]
|
arguments: dict[str, str]
|
||||||
|
|
||||||
|
|
||||||
class WinApiCall(Call):
|
class WinApiCall(Call):
|
||||||
# This class models Windows API calls captured by DRAKVUF (DLLs, etc.).
|
# This class models Windows API calls captured by DRAKVUF (DLLs, etc.).
|
||||||
arguments: Dict[str, str] = Field(alias="Arguments")
|
arguments: dict[str, str] = Field(alias="Arguments")
|
||||||
event: str = Field(alias="Event")
|
event: str = Field(alias="Event")
|
||||||
return_value: str = Field(alias="ReturnValue")
|
return_value: str = Field(alias="ReturnValue")
|
||||||
|
|
||||||
@model_validator(mode="before")
|
@model_validator(mode="before")
|
||||||
@classmethod
|
@classmethod
|
||||||
def build_arguments(cls, values: Dict[str, Any]) -> Dict[str, Any]:
|
def build_arguments(cls, values: dict[str, Any]) -> dict[str, Any]:
|
||||||
args = values["Arguments"]
|
args = values["Arguments"]
|
||||||
values["Arguments"] = dict(arg.split("=", 1) for arg in args)
|
values["Arguments"] = dict(arg.split("=", 1) for arg in args)
|
||||||
return values
|
return values
|
||||||
@@ -100,7 +100,7 @@ class SystemCall(Call):
|
|||||||
|
|
||||||
@model_validator(mode="before")
|
@model_validator(mode="before")
|
||||||
@classmethod
|
@classmethod
|
||||||
def build_extra(cls, values: Dict[str, Any]) -> Dict[str, Any]:
|
def build_extra(cls, values: dict[str, Any]) -> dict[str, Any]:
|
||||||
# DRAKVUF stores argument names and values as entries in the syscall's entry.
|
# DRAKVUF stores argument names and values as entries in the syscall's entry.
|
||||||
# This model validator collects those arguments into a list in the model.
|
# This model validator collects those arguments into a list in the model.
|
||||||
values["arguments"] = {
|
values["arguments"] = {
|
||||||
@@ -110,13 +110,13 @@ class SystemCall(Call):
|
|||||||
|
|
||||||
|
|
||||||
class DrakvufReport(ConciseModel):
|
class DrakvufReport(ConciseModel):
|
||||||
syscalls: List[SystemCall] = []
|
syscalls: list[SystemCall] = []
|
||||||
apicalls: List[WinApiCall] = []
|
apicalls: list[WinApiCall] = []
|
||||||
discovered_dlls: List[DiscoveredDLL] = []
|
discovered_dlls: list[DiscoveredDLL] = []
|
||||||
loaded_dlls: List[LoadedDLL] = []
|
loaded_dlls: list[LoadedDLL] = []
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_raw_report(cls, entries: Iterator[Dict]) -> "DrakvufReport":
|
def from_raw_report(cls, entries: Iterator[dict]) -> "DrakvufReport":
|
||||||
report = cls()
|
report = cls()
|
||||||
|
|
||||||
for entry in entries:
|
for entry in entries:
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from typing import Dict, List, Tuple, Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
from capa.features.common import String, Feature
|
from capa.features.common import String, Feature
|
||||||
from capa.features.address import Address, ThreadAddress, ProcessAddress
|
from capa.features.address import Address, ThreadAddress, ProcessAddress
|
||||||
@@ -18,7 +18,7 @@ logger = logging.getLogger(__name__)
|
|||||||
|
|
||||||
|
|
||||||
def get_threads(
|
def get_threads(
|
||||||
calls: Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]], ph: ProcessHandle
|
calls: dict[ProcessAddress, dict[ThreadAddress, list[Call]]], ph: ProcessHandle
|
||||||
) -> Iterator[ThreadHandle]:
|
) -> Iterator[ThreadHandle]:
|
||||||
"""
|
"""
|
||||||
Get the threads associated with a given process.
|
Get the threads associated with a given process.
|
||||||
@@ -27,11 +27,11 @@ def get_threads(
|
|||||||
yield ThreadHandle(address=thread_addr, inner={})
|
yield ThreadHandle(address=thread_addr, inner={})
|
||||||
|
|
||||||
|
|
||||||
def extract_process_name(ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_process_name(ph: ProcessHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
yield String(ph.inner["process_name"]), ph.address
|
yield String(ph.inner["process_name"]), ph.address
|
||||||
|
|
||||||
|
|
||||||
def extract_features(ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_features(ph: ProcessHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
for handler in PROCESS_HANDLERS:
|
for handler in PROCESS_HANDLERS:
|
||||||
for feature, addr in handler(ph):
|
for feature, addr in handler(ph):
|
||||||
yield feature, addr
|
yield feature, addr
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from typing import Dict, List, Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
from capa.features.address import ThreadAddress, ProcessAddress, DynamicCallAddress
|
from capa.features.address import ThreadAddress, ProcessAddress, DynamicCallAddress
|
||||||
from capa.features.extractors.base_extractor import CallHandle, ThreadHandle, ProcessHandle
|
from capa.features.extractors.base_extractor import CallHandle, ThreadHandle, ProcessHandle
|
||||||
@@ -17,7 +17,7 @@ logger = logging.getLogger(__name__)
|
|||||||
|
|
||||||
|
|
||||||
def get_calls(
|
def get_calls(
|
||||||
sorted_calls: Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]], ph: ProcessHandle, th: ThreadHandle
|
sorted_calls: dict[ProcessAddress, dict[ThreadAddress, list[Call]]], ph: ProcessHandle, th: ThreadHandle
|
||||||
) -> Iterator[CallHandle]:
|
) -> Iterator[CallHandle]:
|
||||||
for i, call in enumerate(sorted_calls[ph.address][th.address]):
|
for i, call in enumerate(sorted_calls[ph.address][th.address]):
|
||||||
call_addr = DynamicCallAddress(thread=th.address, id=i)
|
call_addr = DynamicCallAddress(thread=th.address, id=i)
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ import logging
|
|||||||
import itertools
|
import itertools
|
||||||
import collections
|
import collections
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from typing import TYPE_CHECKING, Set, Dict, List, Tuple, BinaryIO, Iterator, Optional
|
from typing import TYPE_CHECKING, BinaryIO, Iterator, Optional
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
@@ -394,7 +394,7 @@ class ELF:
|
|||||||
return read_cstr(phdr.buf, 0)
|
return read_cstr(phdr.buf, 0)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def versions_needed(self) -> Dict[str, Set[str]]:
|
def versions_needed(self) -> dict[str, set[str]]:
|
||||||
# symbol version requirements are stored in the .gnu.version_r section,
|
# symbol version requirements are stored in the .gnu.version_r section,
|
||||||
# which has type SHT_GNU_verneed (0x6ffffffe).
|
# which has type SHT_GNU_verneed (0x6ffffffe).
|
||||||
#
|
#
|
||||||
@@ -452,7 +452,7 @@ class ELF:
|
|||||||
return {}
|
return {}
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def dynamic_entries(self) -> Iterator[Tuple[int, int]]:
|
def dynamic_entries(self) -> Iterator[tuple[int, int]]:
|
||||||
"""
|
"""
|
||||||
read the entries from the dynamic section,
|
read the entries from the dynamic section,
|
||||||
yielding the tag and value for each entry.
|
yielding the tag and value for each entry.
|
||||||
@@ -547,7 +547,7 @@ class ELF:
|
|||||||
logger.warning("failed to read DT_NEEDED entry: %s", str(e))
|
logger.warning("failed to read DT_NEEDED entry: %s", str(e))
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def symtab(self) -> Optional[Tuple[Shdr, Shdr]]:
|
def symtab(self) -> Optional[tuple[Shdr, Shdr]]:
|
||||||
"""
|
"""
|
||||||
fetch the Shdr for the symtab and the associated strtab.
|
fetch the Shdr for the symtab and the associated strtab.
|
||||||
"""
|
"""
|
||||||
@@ -682,7 +682,7 @@ class SymTab:
|
|||||||
symtab: Shdr,
|
symtab: Shdr,
|
||||||
strtab: Shdr,
|
strtab: Shdr,
|
||||||
) -> None:
|
) -> None:
|
||||||
self.symbols: List[Symbol] = []
|
self.symbols: list[Symbol] = []
|
||||||
|
|
||||||
self.symtab = symtab
|
self.symtab = symtab
|
||||||
self.strtab = strtab
|
self.strtab = strtab
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
import io
|
import io
|
||||||
import logging
|
import logging
|
||||||
from typing import Tuple, Iterator
|
from typing import Iterator
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from elftools.elf.elffile import ELFFile, DynamicSegment, SymbolTableSection
|
from elftools.elf.elffile import ELFFile, DynamicSegment, SymbolTableSection
|
||||||
@@ -166,7 +166,7 @@ def extract_file_arch(elf: ELFFile, **kwargs):
|
|||||||
logger.warning("unsupported architecture: %s", arch)
|
logger.warning("unsupported architecture: %s", arch)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_features(elf: ELFFile, buf: bytes) -> Iterator[Tuple[Feature, int]]:
|
def extract_file_features(elf: ELFFile, buf: bytes) -> Iterator[tuple[Feature, int]]:
|
||||||
for file_handler in FILE_HANDLERS:
|
for file_handler in FILE_HANDLERS:
|
||||||
for feature, addr in file_handler(elf=elf, buf=buf): # type: ignore
|
for feature, addr in file_handler(elf=elf, buf=buf): # type: ignore
|
||||||
yield feature, addr
|
yield feature, addr
|
||||||
@@ -182,7 +182,7 @@ FILE_HANDLERS = (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def extract_global_features(elf: ELFFile, buf: bytes) -> Iterator[Tuple[Feature, int]]:
|
def extract_global_features(elf: ELFFile, buf: bytes) -> Iterator[tuple[Feature, int]]:
|
||||||
for global_handler in GLOBAL_HANDLERS:
|
for global_handler in GLOBAL_HANDLERS:
|
||||||
for feature, addr in global_handler(elf=elf, buf=buf): # type: ignore
|
for feature, addr in global_handler(elf=elf, buf=buf): # type: ignore
|
||||||
yield feature, addr
|
yield feature, addr
|
||||||
|
|||||||
@@ -8,7 +8,7 @@
|
|||||||
|
|
||||||
import string
|
import string
|
||||||
import struct
|
import struct
|
||||||
from typing import Tuple, Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
import ghidra
|
import ghidra
|
||||||
from ghidra.program.model.lang import OperandType
|
from ghidra.program.model.lang import OperandType
|
||||||
@@ -97,7 +97,7 @@ def _bb_has_tight_loop(bb: ghidra.program.model.block.CodeBlock):
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def extract_bb_stackstring(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_bb_stackstring(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""extract stackstring indicators from basic block"""
|
"""extract stackstring indicators from basic block"""
|
||||||
bb: ghidra.program.model.block.CodeBlock = bbh.inner
|
bb: ghidra.program.model.block.CodeBlock = bbh.inner
|
||||||
|
|
||||||
@@ -105,7 +105,7 @@ def extract_bb_stackstring(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[
|
|||||||
yield Characteristic("stack string"), bbh.address
|
yield Characteristic("stack string"), bbh.address
|
||||||
|
|
||||||
|
|
||||||
def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""check basic block for tight loop indicators"""
|
"""check basic block for tight loop indicators"""
|
||||||
bb: ghidra.program.model.block.CodeBlock = bbh.inner
|
bb: ghidra.program.model.block.CodeBlock = bbh.inner
|
||||||
|
|
||||||
@@ -119,7 +119,7 @@ BASIC_BLOCK_HANDLERS = (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
extract features from the given basic block.
|
extract features from the given basic block.
|
||||||
|
|
||||||
@@ -127,7 +127,7 @@ def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Featur
|
|||||||
bb: the basic block to process.
|
bb: the basic block to process.
|
||||||
|
|
||||||
yields:
|
yields:
|
||||||
Tuple[Feature, int]: the features and their location found in this basic block.
|
tuple[Feature, int]: the features and their location found in this basic block.
|
||||||
"""
|
"""
|
||||||
yield BasicBlock(), bbh.address
|
yield BasicBlock(), bbh.address
|
||||||
for bb_handler in BASIC_BLOCK_HANDLERS:
|
for bb_handler in BASIC_BLOCK_HANDLERS:
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
from typing import List, Tuple, Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
import capa.features.extractors.ghidra.file
|
import capa.features.extractors.ghidra.file
|
||||||
import capa.features.extractors.ghidra.insn
|
import capa.features.extractors.ghidra.insn
|
||||||
@@ -40,7 +40,7 @@ class GhidraFeatureExtractor(StaticFeatureExtractor):
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
self.global_features: List[Tuple[Feature, Address]] = []
|
self.global_features: list[tuple[Feature, Address]] = []
|
||||||
self.global_features.extend(capa.features.extractors.ghidra.file.extract_file_format())
|
self.global_features.extend(capa.features.extractors.ghidra.file.extract_file_format())
|
||||||
self.global_features.extend(capa.features.extractors.ghidra.global_.extract_os())
|
self.global_features.extend(capa.features.extractors.ghidra.global_.extract_os())
|
||||||
self.global_features.extend(capa.features.extractors.ghidra.global_.extract_arch())
|
self.global_features.extend(capa.features.extractors.ghidra.global_.extract_arch())
|
||||||
@@ -73,7 +73,7 @@ class GhidraFeatureExtractor(StaticFeatureExtractor):
|
|||||||
func = getFunctionContaining(toAddr(addr)) # type: ignore [name-defined] # noqa: F821
|
func = getFunctionContaining(toAddr(addr)) # type: ignore [name-defined] # noqa: F821
|
||||||
return FunctionHandle(address=AbsoluteVirtualAddress(func.getEntryPoint().getOffset()), inner=func)
|
return FunctionHandle(address=AbsoluteVirtualAddress(func.getEntryPoint().getOffset()), inner=func)
|
||||||
|
|
||||||
def extract_function_features(self, fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_function_features(self, fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.ghidra.function.extract_features(fh)
|
yield from capa.features.extractors.ghidra.function.extract_features(fh)
|
||||||
|
|
||||||
def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]:
|
def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]:
|
||||||
@@ -81,7 +81,7 @@ class GhidraFeatureExtractor(StaticFeatureExtractor):
|
|||||||
|
|
||||||
yield from ghidra_helpers.get_function_blocks(fh)
|
yield from ghidra_helpers.get_function_blocks(fh)
|
||||||
|
|
||||||
def extract_basic_block_features(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_basic_block_features(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
yield from capa.features.extractors.ghidra.basicblock.extract_features(fh, bbh)
|
yield from capa.features.extractors.ghidra.basicblock.extract_features(fh, bbh)
|
||||||
|
|
||||||
def get_instructions(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[InsnHandle]:
|
def get_instructions(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[InsnHandle]:
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
import re
|
import re
|
||||||
import struct
|
import struct
|
||||||
from typing import List, Tuple, Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
from ghidra.program.model.symbol import SourceType, SymbolType
|
from ghidra.program.model.symbol import SourceType, SymbolType
|
||||||
|
|
||||||
@@ -22,7 +22,7 @@ from capa.features.address import NO_ADDRESS, Address, FileOffsetAddress, Absolu
|
|||||||
MAX_OFFSET_PE_AFTER_MZ = 0x200
|
MAX_OFFSET_PE_AFTER_MZ = 0x200
|
||||||
|
|
||||||
|
|
||||||
def find_embedded_pe(block_bytez: bytes, mz_xor: List[Tuple[bytes, bytes, int]]) -> Iterator[Tuple[int, int]]:
|
def find_embedded_pe(block_bytez: bytes, mz_xor: list[tuple[bytes, bytes, int]]) -> Iterator[tuple[int, int]]:
|
||||||
"""check segment for embedded PE
|
"""check segment for embedded PE
|
||||||
|
|
||||||
adapted for Ghidra from:
|
adapted for Ghidra from:
|
||||||
@@ -60,11 +60,11 @@ def find_embedded_pe(block_bytez: bytes, mz_xor: List[Tuple[bytes, bytes, int]])
|
|||||||
yield off, i
|
yield off, i
|
||||||
|
|
||||||
|
|
||||||
def extract_file_embedded_pe() -> Iterator[Tuple[Feature, Address]]:
|
def extract_file_embedded_pe() -> Iterator[tuple[Feature, Address]]:
|
||||||
"""extract embedded PE features"""
|
"""extract embedded PE features"""
|
||||||
|
|
||||||
# pre-compute XOR pairs
|
# pre-compute XOR pairs
|
||||||
mz_xor: List[Tuple[bytes, bytes, int]] = [
|
mz_xor: list[tuple[bytes, bytes, int]] = [
|
||||||
(
|
(
|
||||||
capa.features.extractors.helpers.xor_static(b"MZ", i),
|
capa.features.extractors.helpers.xor_static(b"MZ", i),
|
||||||
capa.features.extractors.helpers.xor_static(b"PE", i),
|
capa.features.extractors.helpers.xor_static(b"PE", i),
|
||||||
@@ -84,14 +84,14 @@ def extract_file_embedded_pe() -> Iterator[Tuple[Feature, Address]]:
|
|||||||
yield Characteristic("embedded pe"), FileOffsetAddress(ea)
|
yield Characteristic("embedded pe"), FileOffsetAddress(ea)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_export_names() -> Iterator[Tuple[Feature, Address]]:
|
def extract_file_export_names() -> Iterator[tuple[Feature, Address]]:
|
||||||
"""extract function exports"""
|
"""extract function exports"""
|
||||||
st = currentProgram().getSymbolTable() # type: ignore [name-defined] # noqa: F821
|
st = currentProgram().getSymbolTable() # type: ignore [name-defined] # noqa: F821
|
||||||
for addr in st.getExternalEntryPointIterator():
|
for addr in st.getExternalEntryPointIterator():
|
||||||
yield Export(st.getPrimarySymbol(addr).getName()), AbsoluteVirtualAddress(addr.getOffset())
|
yield Export(st.getPrimarySymbol(addr).getName()), AbsoluteVirtualAddress(addr.getOffset())
|
||||||
|
|
||||||
|
|
||||||
def extract_file_import_names() -> Iterator[Tuple[Feature, Address]]:
|
def extract_file_import_names() -> Iterator[tuple[Feature, Address]]:
|
||||||
"""extract function imports
|
"""extract function imports
|
||||||
|
|
||||||
1. imports by ordinal:
|
1. imports by ordinal:
|
||||||
@@ -116,14 +116,14 @@ def extract_file_import_names() -> Iterator[Tuple[Feature, Address]]:
|
|||||||
yield Import(name), AbsoluteVirtualAddress(addr)
|
yield Import(name), AbsoluteVirtualAddress(addr)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_section_names() -> Iterator[Tuple[Feature, Address]]:
|
def extract_file_section_names() -> Iterator[tuple[Feature, Address]]:
|
||||||
"""extract section names"""
|
"""extract section names"""
|
||||||
|
|
||||||
for block in currentProgram().getMemory().getBlocks(): # type: ignore [name-defined] # noqa: F821
|
for block in currentProgram().getMemory().getBlocks(): # type: ignore [name-defined] # noqa: F821
|
||||||
yield Section(block.getName()), AbsoluteVirtualAddress(block.getStart().getOffset())
|
yield Section(block.getName()), AbsoluteVirtualAddress(block.getStart().getOffset())
|
||||||
|
|
||||||
|
|
||||||
def extract_file_strings() -> Iterator[Tuple[Feature, Address]]:
|
def extract_file_strings() -> Iterator[tuple[Feature, Address]]:
|
||||||
"""extract ASCII and UTF-16 LE strings"""
|
"""extract ASCII and UTF-16 LE strings"""
|
||||||
|
|
||||||
for block in currentProgram().getMemory().getBlocks(): # type: ignore [name-defined] # noqa: F821
|
for block in currentProgram().getMemory().getBlocks(): # type: ignore [name-defined] # noqa: F821
|
||||||
@@ -141,7 +141,7 @@ def extract_file_strings() -> Iterator[Tuple[Feature, Address]]:
|
|||||||
yield String(s.s), FileOffsetAddress(offset)
|
yield String(s.s), FileOffsetAddress(offset)
|
||||||
|
|
||||||
|
|
||||||
def extract_file_function_names() -> Iterator[Tuple[Feature, Address]]:
|
def extract_file_function_names() -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
extract the names of statically-linked library functions.
|
extract the names of statically-linked library functions.
|
||||||
"""
|
"""
|
||||||
@@ -162,7 +162,7 @@ def extract_file_function_names() -> Iterator[Tuple[Feature, Address]]:
|
|||||||
yield FunctionName(name[1:]), addr
|
yield FunctionName(name[1:]), addr
|
||||||
|
|
||||||
|
|
||||||
def extract_file_format() -> Iterator[Tuple[Feature, Address]]:
|
def extract_file_format() -> Iterator[tuple[Feature, Address]]:
|
||||||
ef = currentProgram().getExecutableFormat() # type: ignore [name-defined] # noqa: F821
|
ef = currentProgram().getExecutableFormat() # type: ignore [name-defined] # noqa: F821
|
||||||
if "PE" in ef:
|
if "PE" in ef:
|
||||||
yield Format(FORMAT_PE), NO_ADDRESS
|
yield Format(FORMAT_PE), NO_ADDRESS
|
||||||
@@ -175,7 +175,7 @@ def extract_file_format() -> Iterator[Tuple[Feature, Address]]:
|
|||||||
raise NotImplementedError(f"unexpected file format: {ef}")
|
raise NotImplementedError(f"unexpected file format: {ef}")
|
||||||
|
|
||||||
|
|
||||||
def extract_features() -> Iterator[Tuple[Feature, Address]]:
|
def extract_features() -> Iterator[tuple[Feature, Address]]:
|
||||||
"""extract file features"""
|
"""extract file features"""
|
||||||
for file_handler in FILE_HANDLERS:
|
for file_handler in FILE_HANDLERS:
|
||||||
for feature, addr in file_handler():
|
for feature, addr in file_handler():
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
from typing import Tuple, Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
import ghidra
|
import ghidra
|
||||||
from ghidra.program.model.block import BasicBlockModel, SimpleBlockIterator
|
from ghidra.program.model.block import BasicBlockModel, SimpleBlockIterator
|
||||||
@@ -49,7 +49,7 @@ def extract_recursive_call(fh: FunctionHandle):
|
|||||||
yield Characteristic("recursive call"), AbsoluteVirtualAddress(f.getEntryPoint().getOffset())
|
yield Characteristic("recursive call"), AbsoluteVirtualAddress(f.getEntryPoint().getOffset())
|
||||||
|
|
||||||
|
|
||||||
def extract_features(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_features(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
for func_handler in FUNCTION_HANDLERS:
|
for func_handler in FUNCTION_HANDLERS:
|
||||||
for feature, addr in func_handler(fh):
|
for feature, addr in func_handler(fh):
|
||||||
yield feature, addr
|
yield feature, addr
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
import logging
|
import logging
|
||||||
import contextlib
|
import contextlib
|
||||||
from typing import Tuple, Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
import capa.ghidra.helpers
|
import capa.ghidra.helpers
|
||||||
import capa.features.extractors.elf
|
import capa.features.extractors.elf
|
||||||
@@ -18,7 +18,7 @@ from capa.features.address import NO_ADDRESS, Address
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def extract_os() -> Iterator[Tuple[Feature, Address]]:
|
def extract_os() -> Iterator[tuple[Feature, Address]]:
|
||||||
format_name: str = currentProgram().getExecutableFormat() # type: ignore [name-defined] # noqa: F821
|
format_name: str = currentProgram().getExecutableFormat() # type: ignore [name-defined] # noqa: F821
|
||||||
|
|
||||||
if "PE" in format_name:
|
if "PE" in format_name:
|
||||||
@@ -45,7 +45,7 @@ def extract_os() -> Iterator[Tuple[Feature, Address]]:
|
|||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
def extract_arch() -> Iterator[Tuple[Feature, Address]]:
|
def extract_arch() -> Iterator[tuple[Feature, Address]]:
|
||||||
lang_id = currentProgram().getMetadata().get("Language ID") # type: ignore [name-defined] # noqa: F821
|
lang_id = currentProgram().getMetadata().get("Language ID") # type: ignore [name-defined] # noqa: F821
|
||||||
|
|
||||||
if "x86" in lang_id and "64" in lang_id:
|
if "x86" in lang_id and "64" in lang_id:
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
from typing import Dict, List, Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
import ghidra
|
import ghidra
|
||||||
import java.lang
|
import java.lang
|
||||||
@@ -20,7 +20,7 @@ from capa.features.address import AbsoluteVirtualAddress
|
|||||||
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle
|
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle
|
||||||
|
|
||||||
|
|
||||||
def ints_to_bytes(bytez: List[int]) -> bytes:
|
def ints_to_bytes(bytez: list[int]) -> bytes:
|
||||||
"""convert Java signed ints to Python bytes
|
"""convert Java signed ints to Python bytes
|
||||||
|
|
||||||
args:
|
args:
|
||||||
@@ -83,10 +83,10 @@ def get_insn_in_range(bbh: BBHandle) -> Iterator[InsnHandle]:
|
|||||||
yield InsnHandle(address=AbsoluteVirtualAddress(insn.getAddress().getOffset()), inner=insn)
|
yield InsnHandle(address=AbsoluteVirtualAddress(insn.getAddress().getOffset()), inner=insn)
|
||||||
|
|
||||||
|
|
||||||
def get_file_imports() -> Dict[int, List[str]]:
|
def get_file_imports() -> dict[int, list[str]]:
|
||||||
"""get all import names & addrs"""
|
"""get all import names & addrs"""
|
||||||
|
|
||||||
import_dict: Dict[int, List[str]] = {}
|
import_dict: dict[int, list[str]] = {}
|
||||||
|
|
||||||
for f in currentProgram().getFunctionManager().getExternalFunctions(): # type: ignore [name-defined] # noqa: F821
|
for f in currentProgram().getFunctionManager().getExternalFunctions(): # type: ignore [name-defined] # noqa: F821
|
||||||
for r in f.getSymbol().getReferences():
|
for r in f.getSymbol().getReferences():
|
||||||
@@ -110,7 +110,7 @@ def get_file_imports() -> Dict[int, List[str]]:
|
|||||||
return import_dict
|
return import_dict
|
||||||
|
|
||||||
|
|
||||||
def get_file_externs() -> Dict[int, List[str]]:
|
def get_file_externs() -> dict[int, list[str]]:
|
||||||
"""
|
"""
|
||||||
Gets function names & addresses of statically-linked library functions
|
Gets function names & addresses of statically-linked library functions
|
||||||
|
|
||||||
@@ -124,7 +124,7 @@ def get_file_externs() -> Dict[int, List[str]]:
|
|||||||
- Note: See Symbol Table labels
|
- Note: See Symbol Table labels
|
||||||
"""
|
"""
|
||||||
|
|
||||||
extern_dict: Dict[int, List[str]] = {}
|
extern_dict: dict[int, list[str]] = {}
|
||||||
|
|
||||||
for sym in currentProgram().getSymbolTable().getAllSymbols(True): # type: ignore [name-defined] # noqa: F821
|
for sym in currentProgram().getSymbolTable().getAllSymbols(True): # type: ignore [name-defined] # noqa: F821
|
||||||
# .isExternal() misses more than this config for the function symbols
|
# .isExternal() misses more than this config for the function symbols
|
||||||
@@ -143,7 +143,7 @@ def get_file_externs() -> Dict[int, List[str]]:
|
|||||||
return extern_dict
|
return extern_dict
|
||||||
|
|
||||||
|
|
||||||
def map_fake_import_addrs() -> Dict[int, List[int]]:
|
def map_fake_import_addrs() -> dict[int, list[int]]:
|
||||||
"""
|
"""
|
||||||
Map ghidra's fake import entrypoints to their
|
Map ghidra's fake import entrypoints to their
|
||||||
real addresses
|
real addresses
|
||||||
@@ -162,7 +162,7 @@ def map_fake_import_addrs() -> Dict[int, List[int]]:
|
|||||||
- 0x473090 -> PTR_CreateServiceW_00473090
|
- 0x473090 -> PTR_CreateServiceW_00473090
|
||||||
- 'EXTERNAL:00000025' -> External Address (ghidra.program.model.address.SpecialAddress)
|
- 'EXTERNAL:00000025' -> External Address (ghidra.program.model.address.SpecialAddress)
|
||||||
"""
|
"""
|
||||||
fake_dict: Dict[int, List[int]] = {}
|
fake_dict: dict[int, list[int]] = {}
|
||||||
|
|
||||||
for f in currentProgram().getFunctionManager().getExternalFunctions(): # type: ignore [name-defined] # noqa: F821
|
for f in currentProgram().getFunctionManager().getExternalFunctions(): # type: ignore [name-defined] # noqa: F821
|
||||||
for r in f.getSymbol().getReferences():
|
for r in f.getSymbol().getReferences():
|
||||||
@@ -174,9 +174,9 @@ def map_fake_import_addrs() -> Dict[int, List[int]]:
|
|||||||
|
|
||||||
def check_addr_for_api(
|
def check_addr_for_api(
|
||||||
addr: ghidra.program.model.address.Address,
|
addr: ghidra.program.model.address.Address,
|
||||||
fakes: Dict[int, List[int]],
|
fakes: dict[int, list[int]],
|
||||||
imports: Dict[int, List[str]],
|
imports: dict[int, list[str]],
|
||||||
externs: Dict[int, List[str]],
|
externs: dict[int, list[str]],
|
||||||
) -> bool:
|
) -> bool:
|
||||||
offset = addr.getOffset()
|
offset = addr.getOffset()
|
||||||
|
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and limitations under the License.
|
# See the License for the specific language governing permissions and limitations under the License.
|
||||||
from typing import Any, Dict, Tuple, Iterator
|
from typing import Any, Iterator
|
||||||
|
|
||||||
import ghidra
|
import ghidra
|
||||||
from ghidra.program.model.lang import OperandType
|
from ghidra.program.model.lang import OperandType
|
||||||
@@ -26,21 +26,21 @@ SECURITY_COOKIE_BYTES_DELTA = 0x40
|
|||||||
OPERAND_TYPE_DYNAMIC_ADDRESS = OperandType.DYNAMIC | OperandType.ADDRESS
|
OPERAND_TYPE_DYNAMIC_ADDRESS = OperandType.DYNAMIC | OperandType.ADDRESS
|
||||||
|
|
||||||
|
|
||||||
def get_imports(ctx: Dict[str, Any]) -> Dict[int, Any]:
|
def get_imports(ctx: dict[str, Any]) -> dict[int, Any]:
|
||||||
"""Populate the import cache for this context"""
|
"""Populate the import cache for this context"""
|
||||||
if "imports_cache" not in ctx:
|
if "imports_cache" not in ctx:
|
||||||
ctx["imports_cache"] = capa.features.extractors.ghidra.helpers.get_file_imports()
|
ctx["imports_cache"] = capa.features.extractors.ghidra.helpers.get_file_imports()
|
||||||
return ctx["imports_cache"]
|
return ctx["imports_cache"]
|
||||||
|
|
||||||
|
|
||||||
def get_externs(ctx: Dict[str, Any]) -> Dict[int, Any]:
|
def get_externs(ctx: dict[str, Any]) -> dict[int, Any]:
|
||||||
"""Populate the externs cache for this context"""
|
"""Populate the externs cache for this context"""
|
||||||
if "externs_cache" not in ctx:
|
if "externs_cache" not in ctx:
|
||||||
ctx["externs_cache"] = capa.features.extractors.ghidra.helpers.get_file_externs()
|
ctx["externs_cache"] = capa.features.extractors.ghidra.helpers.get_file_externs()
|
||||||
return ctx["externs_cache"]
|
return ctx["externs_cache"]
|
||||||
|
|
||||||
|
|
||||||
def get_fakes(ctx: Dict[str, Any]) -> Dict[int, Any]:
|
def get_fakes(ctx: dict[str, Any]) -> dict[int, Any]:
|
||||||
"""Populate the fake import addrs cache for this context"""
|
"""Populate the fake import addrs cache for this context"""
|
||||||
if "fakes_cache" not in ctx:
|
if "fakes_cache" not in ctx:
|
||||||
ctx["fakes_cache"] = capa.features.extractors.ghidra.helpers.map_fake_import_addrs()
|
ctx["fakes_cache"] = capa.features.extractors.ghidra.helpers.map_fake_import_addrs()
|
||||||
@@ -48,7 +48,7 @@ def get_fakes(ctx: Dict[str, Any]) -> Dict[int, Any]:
|
|||||||
|
|
||||||
|
|
||||||
def check_for_api_call(
|
def check_for_api_call(
|
||||||
insn, externs: Dict[int, Any], fakes: Dict[int, Any], imports: Dict[int, Any], imp_or_ex: bool
|
insn, externs: dict[int, Any], fakes: dict[int, Any], imports: dict[int, Any], imp_or_ex: bool
|
||||||
) -> Iterator[Any]:
|
) -> Iterator[Any]:
|
||||||
"""check instruction for API call
|
"""check instruction for API call
|
||||||
|
|
||||||
@@ -110,7 +110,7 @@ def check_for_api_call(
|
|||||||
yield info
|
yield info
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_api_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_insn_api_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
insn: ghidra.program.database.code.InstructionDB = ih.inner
|
insn: ghidra.program.database.code.InstructionDB = ih.inner
|
||||||
|
|
||||||
if not capa.features.extractors.ghidra.helpers.is_call_or_jmp(insn):
|
if not capa.features.extractors.ghidra.helpers.is_call_or_jmp(insn):
|
||||||
@@ -131,7 +131,7 @@ def extract_insn_api_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle)
|
|||||||
yield API(ext), ih.address
|
yield API(ext), ih.address
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_number_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_insn_number_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse instruction number features
|
parse instruction number features
|
||||||
example:
|
example:
|
||||||
@@ -186,7 +186,7 @@ def extract_insn_number_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandl
|
|||||||
yield OperandOffset(i, const), addr
|
yield OperandOffset(i, const), addr
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_offset_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_insn_offset_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse instruction structure offset features
|
parse instruction structure offset features
|
||||||
|
|
||||||
@@ -219,7 +219,7 @@ def extract_insn_offset_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandl
|
|||||||
yield OperandOffset(i, op_off), ih.address
|
yield OperandOffset(i, op_off), ih.address
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_bytes_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_insn_bytes_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse referenced byte sequences
|
parse referenced byte sequences
|
||||||
|
|
||||||
@@ -234,7 +234,7 @@ def extract_insn_bytes_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle
|
|||||||
yield Bytes(extracted_bytes), ih.address
|
yield Bytes(extracted_bytes), ih.address
|
||||||
|
|
||||||
|
|
||||||
def extract_insn_string_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
|
def extract_insn_string_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse instruction string features
|
parse instruction string features
|
||||||
|
|
||||||
@@ -249,7 +249,7 @@ def extract_insn_string_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandl
|
|||||||
|
|
||||||
def extract_insn_mnemonic_features(
|
def extract_insn_mnemonic_features(
|
||||||
fh: FunctionHandle, bb: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bb: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""parse instruction mnemonic features"""
|
"""parse instruction mnemonic features"""
|
||||||
insn: ghidra.program.database.code.InstructionDB = ih.inner
|
insn: ghidra.program.database.code.InstructionDB = ih.inner
|
||||||
|
|
||||||
@@ -258,7 +258,7 @@ def extract_insn_mnemonic_features(
|
|||||||
|
|
||||||
def extract_insn_obfs_call_plus_5_characteristic_features(
|
def extract_insn_obfs_call_plus_5_characteristic_features(
|
||||||
fh: FunctionHandle, bb: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bb: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""
|
"""
|
||||||
parse call $+5 instruction from the given instruction.
|
parse call $+5 instruction from the given instruction.
|
||||||
"""
|
"""
|
||||||
@@ -279,7 +279,7 @@ def extract_insn_obfs_call_plus_5_characteristic_features(
|
|||||||
|
|
||||||
def extract_insn_segment_access_features(
|
def extract_insn_segment_access_features(
|
||||||
fh: FunctionHandle, bb: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bb: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""parse instruction fs or gs access"""
|
"""parse instruction fs or gs access"""
|
||||||
insn: ghidra.program.database.code.InstructionDB = ih.inner
|
insn: ghidra.program.database.code.InstructionDB = ih.inner
|
||||||
|
|
||||||
@@ -294,7 +294,7 @@ def extract_insn_segment_access_features(
|
|||||||
|
|
||||||
def extract_insn_peb_access_characteristic_features(
|
def extract_insn_peb_access_characteristic_features(
|
||||||
fh: FunctionHandle, bb: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bb: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""parse instruction peb access
|
"""parse instruction peb access
|
||||||
|
|
||||||
fs:[0x30] on x86, gs:[0x60] on x64
|
fs:[0x30] on x86, gs:[0x60] on x64
|
||||||
@@ -310,7 +310,7 @@ def extract_insn_peb_access_characteristic_features(
|
|||||||
|
|
||||||
def extract_insn_cross_section_cflow(
|
def extract_insn_cross_section_cflow(
|
||||||
fh: FunctionHandle, bb: BBHandle, ih: InsnHandle
|
fh: FunctionHandle, bb: BBHandle, ih: InsnHandle
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""inspect the instruction for a CALL or JMP that crosses section boundaries"""
|
"""inspect the instruction for a CALL or JMP that crosses section boundaries"""
|
||||||
insn: ghidra.program.database.code.InstructionDB = ih.inner
|
insn: ghidra.program.database.code.InstructionDB = ih.inner
|
||||||
|
|
||||||
@@ -364,7 +364,7 @@ def extract_function_calls_from(
|
|||||||
fh: FunctionHandle,
|
fh: FunctionHandle,
|
||||||
bb: BBHandle,
|
bb: BBHandle,
|
||||||
ih: InsnHandle,
|
ih: InsnHandle,
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""extract functions calls from features
|
"""extract functions calls from features
|
||||||
|
|
||||||
most relevant at the function scope, however, its most efficient to extract at the instruction scope
|
most relevant at the function scope, however, its most efficient to extract at the instruction scope
|
||||||
@@ -393,7 +393,7 @@ def extract_function_indirect_call_characteristic_features(
|
|||||||
fh: FunctionHandle,
|
fh: FunctionHandle,
|
||||||
bb: BBHandle,
|
bb: BBHandle,
|
||||||
ih: InsnHandle,
|
ih: InsnHandle,
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
"""extract indirect function calls (e.g., call eax or call dword ptr [edx+4])
|
"""extract indirect function calls (e.g., call eax or call dword ptr [edx+4])
|
||||||
does not include calls like => call ds:dword_ABD4974
|
does not include calls like => call ds:dword_ABD4974
|
||||||
|
|
||||||
@@ -442,7 +442,7 @@ def extract_insn_nzxor_characteristic_features(
|
|||||||
fh: FunctionHandle,
|
fh: FunctionHandle,
|
||||||
bb: BBHandle,
|
bb: BBHandle,
|
||||||
ih: InsnHandle,
|
ih: InsnHandle,
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
f: ghidra.program.database.function.FunctionDB = fh.inner
|
f: ghidra.program.database.function.FunctionDB = fh.inner
|
||||||
insn: ghidra.program.database.code.InstructionDB = ih.inner
|
insn: ghidra.program.database.code.InstructionDB = ih.inner
|
||||||
|
|
||||||
@@ -461,7 +461,7 @@ def extract_features(
|
|||||||
fh: FunctionHandle,
|
fh: FunctionHandle,
|
||||||
bb: BBHandle,
|
bb: BBHandle,
|
||||||
insn: InsnHandle,
|
insn: InsnHandle,
|
||||||
) -> Iterator[Tuple[Feature, Address]]:
|
) -> Iterator[tuple[Feature, Address]]:
|
||||||
for insn_handler in INSTRUCTION_HANDLERS:
|
for insn_handler in INSTRUCTION_HANDLERS:
|
||||||
for feature, addr in insn_handler(fh, bb, insn):
|
for feature, addr in insn_handler(fh, bb, insn):
|
||||||
yield feature, addr
|
yield feature, addr
|
||||||
|
|||||||
@@ -8,7 +8,7 @@
|
|||||||
|
|
||||||
import struct
|
import struct
|
||||||
import builtins
|
import builtins
|
||||||
from typing import Tuple, Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
MIN_STACKSTRING_LEN = 8
|
MIN_STACKSTRING_LEN = 8
|
||||||
|
|
||||||
@@ -119,7 +119,7 @@ def twos_complement(val: int, bits: int) -> int:
|
|||||||
return val
|
return val
|
||||||
|
|
||||||
|
|
||||||
def carve_pe(pbytes: bytes, offset: int = 0) -> Iterator[Tuple[int, int]]:
|
def carve_pe(pbytes: bytes, offset: int = 0) -> Iterator[tuple[int, int]]:
|
||||||
"""
|
"""
|
||||||
Generate (offset, key) tuples of embedded PEs
|
Generate (offset, key) tuples of embedded PEs
|
||||||
|
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user