mirror of
https://github.com/mandiant/capa.git
synced 2025-12-12 15:49:46 -08:00
Merge pull request #1662 from Aayush-Goel-04/Aayush-Goel-04/Issue#1607
ELF: Implement file import and export name extractor
This commit is contained in:
@@ -3,6 +3,7 @@
|
||||
## master (unreleased)
|
||||
|
||||
### New Features
|
||||
- ELF: implement file import and export name extractor #1607 @Aayush-Goel-04
|
||||
|
||||
### Breaking Changes
|
||||
|
||||
|
||||
@@ -11,9 +11,10 @@ from typing import Tuple, Iterator
|
||||
from pathlib import Path
|
||||
|
||||
from elftools.elf.elffile import ELFFile, SymbolTableSection
|
||||
from elftools.elf.relocation import RelocationSection
|
||||
|
||||
import capa.features.extractors.common
|
||||
from capa.features.file import Import, Section
|
||||
from capa.features.file import Export, Import, Section
|
||||
from capa.features.common import OS, FORMAT_ELF, Arch, Format, Feature
|
||||
from capa.features.address import NO_ADDRESS, FileOffsetAddress, AbsoluteVirtualAddress
|
||||
from capa.features.extractors.base_extractor import FeatureExtractor
|
||||
@@ -21,11 +22,8 @@ from capa.features.extractors.base_extractor import FeatureExtractor
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def extract_file_import_names(elf, **kwargs):
|
||||
# see https://github.com/eliben/pyelftools/blob/0664de05ed2db3d39041e2d51d19622a8ef4fb0f/scripts/readelf.py#L372
|
||||
symbol_tables = [(idx, s) for idx, s in enumerate(elf.iter_sections()) if isinstance(s, SymbolTableSection)]
|
||||
|
||||
for _, section in symbol_tables:
|
||||
def extract_file_export_names(elf: ELFFile, **kwargs):
|
||||
for section in elf.iter_sections():
|
||||
if not isinstance(section, SymbolTableSection):
|
||||
continue
|
||||
|
||||
@@ -35,14 +33,64 @@ def extract_file_import_names(elf, **kwargs):
|
||||
|
||||
logger.debug("Symbol table '%s' contains %s entries:", section.name, section.num_symbols())
|
||||
|
||||
for symbol in section.iter_symbols():
|
||||
# The following conditions are based on the following article
|
||||
# http://www.m4b.io/elf/export/binary/analysis/2015/05/25/what-is-an-elf-export.html
|
||||
if not symbol.name:
|
||||
continue
|
||||
if symbol.entry.st_info.type not in ["STT_FUNC", "STT_OBJECT", "STT_IFUNC"]:
|
||||
continue
|
||||
if symbol.entry.st_value == 0:
|
||||
continue
|
||||
if symbol.entry.st_shndx == "SHN_UNDEF":
|
||||
continue
|
||||
|
||||
yield Export(symbol.name), AbsoluteVirtualAddress(symbol.entry.st_value)
|
||||
|
||||
|
||||
def extract_file_import_names(elf: ELFFile, **kwargs):
|
||||
# Create a dictionary to store symbol names by their index
|
||||
symbol_names = {}
|
||||
|
||||
# Extract symbol names and store them in the dictionary
|
||||
for section in elf.iter_sections():
|
||||
if not isinstance(section, SymbolTableSection):
|
||||
continue
|
||||
|
||||
for _, symbol in enumerate(section.iter_symbols()):
|
||||
if symbol.name and symbol.entry.st_info.type == "STT_FUNC":
|
||||
# TODO(williballenthin): extract symbol address
|
||||
# https://github.com/mandiant/capa/issues/1608
|
||||
yield Import(symbol.name), FileOffsetAddress(0x0)
|
||||
# The following conditions are based on the following article
|
||||
# http://www.m4b.io/elf/export/binary/analysis/2015/05/25/what-is-an-elf-export.html
|
||||
if not symbol.name:
|
||||
continue
|
||||
if symbol.entry.st_info.type not in ["STT_FUNC", "STT_OBJECT", "STT_IFUNC"]:
|
||||
continue
|
||||
if symbol.entry.st_value != 0:
|
||||
continue
|
||||
if symbol.entry.st_shndx != "SHN_UNDEF":
|
||||
continue
|
||||
if symbol.entry.st_name == 0:
|
||||
continue
|
||||
|
||||
symbol_names[_] = symbol.name
|
||||
|
||||
for section in elf.iter_sections():
|
||||
if not isinstance(section, RelocationSection):
|
||||
continue
|
||||
|
||||
if section["sh_entsize"] == 0:
|
||||
logger.debug("Symbol table '%s' has a sh_entsize of zero!", section.name)
|
||||
continue
|
||||
|
||||
logger.debug("Symbol table '%s' contains %s entries:", section.name, section.num_relocations())
|
||||
|
||||
for relocation in section.iter_relocations():
|
||||
# Extract the symbol name from the symbol table using the symbol index in the relocation
|
||||
if relocation["r_info_sym"] not in symbol_names:
|
||||
continue
|
||||
yield Import(symbol_names[relocation["r_info_sym"]]), FileOffsetAddress(relocation["r_offset"])
|
||||
|
||||
|
||||
def extract_file_section_names(elf, **kwargs):
|
||||
def extract_file_section_names(elf: ELFFile, **kwargs):
|
||||
for section in elf.iter_sections():
|
||||
if section.name:
|
||||
yield Section(section.name), AbsoluteVirtualAddress(section.header.sh_addr)
|
||||
@@ -54,7 +102,7 @@ def extract_file_strings(buf, **kwargs):
|
||||
yield from capa.features.extractors.common.extract_file_strings(buf)
|
||||
|
||||
|
||||
def extract_file_os(elf, buf, **kwargs):
|
||||
def extract_file_os(elf: ELFFile, buf, **kwargs):
|
||||
# our current approach does not always get an OS value, e.g. for packed samples
|
||||
# for file limitation purposes, we're more lax here
|
||||
try:
|
||||
@@ -68,7 +116,7 @@ def extract_file_format(**kwargs):
|
||||
yield Format(FORMAT_ELF), NO_ADDRESS
|
||||
|
||||
|
||||
def extract_file_arch(elf, **kwargs):
|
||||
def extract_file_arch(elf: ELFFile, **kwargs):
|
||||
arch = elf.get_machine_arch()
|
||||
if arch == "x86":
|
||||
yield Arch("i386"), NO_ADDRESS
|
||||
@@ -85,8 +133,7 @@ def extract_file_features(elf: ELFFile, buf: bytes) -> Iterator[Tuple[Feature, i
|
||||
|
||||
|
||||
FILE_HANDLERS = (
|
||||
# TODO(williballenthin): implement extract_file_export_names
|
||||
# https://github.com/mandiant/capa/issues/1607
|
||||
extract_file_export_names,
|
||||
extract_file_import_names,
|
||||
extract_file_section_names,
|
||||
extract_file_strings,
|
||||
|
||||
71
tests/test_elffile_features.py
Normal file
71
tests/test_elffile_features.py
Normal file
@@ -0,0 +1,71 @@
|
||||
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and limitations under the License.
|
||||
import io
|
||||
from pathlib import Path
|
||||
|
||||
from elftools.elf.elffile import ELFFile
|
||||
|
||||
from capa.features.extractors.elffile import extract_file_export_names, extract_file_import_names
|
||||
|
||||
CD = Path(__file__).resolve().parent
|
||||
SAMPLE_PATH = CD / "data" / "055da8e6ccfe5a9380231ea04b850e18.elf_"
|
||||
|
||||
|
||||
def test_elffile_import_features():
|
||||
expected_imports = [
|
||||
"memfrob",
|
||||
"puts",
|
||||
"__libc_start_main",
|
||||
"malloc",
|
||||
"__cxa_finalize",
|
||||
]
|
||||
path = Path(SAMPLE_PATH)
|
||||
elf = ELFFile(io.BytesIO(path.read_bytes()))
|
||||
# Extract imports
|
||||
imports = list(extract_file_import_names(elf))
|
||||
|
||||
# Verify that at least one import was found
|
||||
assert len(imports) > 0, "No imports were found."
|
||||
|
||||
# Extract the symbol names from the extracted imports
|
||||
extracted_symbol_names = [imported[0].value for imported in imports]
|
||||
|
||||
# Check if all expected symbol names are found
|
||||
for symbol_name in expected_imports:
|
||||
assert symbol_name in extracted_symbol_names, f"Symbol '{symbol_name}' not found in imports."
|
||||
|
||||
|
||||
def test_elffile_export_features():
|
||||
expected_exports = [
|
||||
"deregister_tm_clones",
|
||||
"register_tm_clones",
|
||||
"__do_global_dtors_aux",
|
||||
"completed.8060",
|
||||
"__do_global_dtors_aux_fini_array_entry",
|
||||
"frame_dummy",
|
||||
"_init",
|
||||
"__libc_csu_fini",
|
||||
"_fini",
|
||||
"__dso_handle",
|
||||
"_IO_stdin_used",
|
||||
"__libc_csu_init",
|
||||
]
|
||||
path = Path(SAMPLE_PATH)
|
||||
elf = ELFFile(io.BytesIO(path.read_bytes()))
|
||||
# Extract imports
|
||||
exports = list(extract_file_export_names(elf))
|
||||
|
||||
# Verify that at least one export was found
|
||||
assert len(exports) > 0, "No exports were found."
|
||||
|
||||
# Extract the symbol names from the extracted imports
|
||||
extracted_symbol_names = [exported[0].value for exported in exports]
|
||||
|
||||
# Check if all expected symbol names are found
|
||||
for symbol_name in expected_exports:
|
||||
assert symbol_name in extracted_symbol_names, f"Symbol '{symbol_name}' not found in exports."
|
||||
Reference in New Issue
Block a user