mirror of
https://github.com/mandiant/capa.git
synced 2025-12-12 15:49:46 -08:00
332
scripts/detect-elf-os.py
Normal file
332
scripts/detect-elf-os.py
Normal file
@@ -0,0 +1,332 @@
|
|||||||
|
#!/usr/bin/env python2
|
||||||
|
"""
|
||||||
|
Copyright (C) 2021 FireEye, Inc. All Rights Reserved.
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at: [package root]/LICENSE.txt
|
||||||
|
Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
|
is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
|
||||||
|
detect-elf-os
|
||||||
|
|
||||||
|
Attempt to detect the underlying OS that the given ELF file targets.
|
||||||
|
"""
|
||||||
|
import sys
|
||||||
|
import struct
|
||||||
|
import logging
|
||||||
|
import argparse
|
||||||
|
import contextlib
|
||||||
|
from enum import Enum
|
||||||
|
from typing import BinaryIO
|
||||||
|
|
||||||
|
logger = logging.getLogger("capa.detect-elf-os")
|
||||||
|
|
||||||
|
|
||||||
|
def align(v, alignment):
|
||||||
|
remainder = v % alignment
|
||||||
|
if remainder == 0:
|
||||||
|
return v
|
||||||
|
else:
|
||||||
|
return v + remainder
|
||||||
|
|
||||||
|
|
||||||
|
class IDAIO:
|
||||||
|
"""
|
||||||
|
An object that acts as a file-like object,
|
||||||
|
using bytes from the current IDB workspace.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
assert IDAIO.is_runtime_ida() == True
|
||||||
|
|
||||||
|
super(IDAIO, self).__init__()
|
||||||
|
import idc
|
||||||
|
import ida_bytes
|
||||||
|
import ida_loader
|
||||||
|
|
||||||
|
self.offset = 0
|
||||||
|
|
||||||
|
def seek(self, offset, whence=0):
|
||||||
|
assert whence == 0
|
||||||
|
self.offset = offset
|
||||||
|
|
||||||
|
def read(self, size):
|
||||||
|
ea = ida_loader.get_fileregion_ea(self.offset)
|
||||||
|
if ea == idc.BADADDR:
|
||||||
|
# best guess, such as if file is mapped at address 0x0.
|
||||||
|
ea = self.offset
|
||||||
|
|
||||||
|
logger.debug("reading 0x%x bytes at 0x%x (ea: 0x%x)", size, self.offset, ea)
|
||||||
|
return ida_bytes.get_bytes(ea, size)
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
return
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def is_runtime_ida():
|
||||||
|
try:
|
||||||
|
import idc
|
||||||
|
except ImportError:
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
class CorruptElfFile(ValueError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class OS(str, Enum):
|
||||||
|
HPUX = "HPUX"
|
||||||
|
NETBSD = "NETBSD"
|
||||||
|
LINUX = "LINUX"
|
||||||
|
HURD = "HURD"
|
||||||
|
_86OPEN = "86OPEN"
|
||||||
|
SOLARIS = "SOLARIS"
|
||||||
|
AIX = "AIX"
|
||||||
|
IRIX = "IRIX"
|
||||||
|
FREEBSD = "FREEBSD"
|
||||||
|
TRU64 = "TRU64"
|
||||||
|
MODESTO = "MODESTO"
|
||||||
|
OPENBSD = "OPENBSD"
|
||||||
|
OPENVMS = "OPENVMS"
|
||||||
|
NSK = "NSK"
|
||||||
|
AROS = "AROS"
|
||||||
|
FENIXOS = "FENIXOS"
|
||||||
|
CLOUD = "CLOUD"
|
||||||
|
SORTFIX = "SORTFIX"
|
||||||
|
ARM_AEABI = "ARM_AEABI"
|
||||||
|
SYLLABLE = "SYLLABLE"
|
||||||
|
NACL = "NACL"
|
||||||
|
|
||||||
|
|
||||||
|
def detect_elf_os(f: BinaryIO) -> str:
|
||||||
|
f.seek(0x0)
|
||||||
|
file_header = f.read(0x40)
|
||||||
|
|
||||||
|
# we'll set this to the detected OS
|
||||||
|
# prefer the first heuristics,
|
||||||
|
# but rather than short circuiting,
|
||||||
|
# we'll still parse out the remainder, for debugging.
|
||||||
|
ret = None
|
||||||
|
|
||||||
|
if not file_header.startswith(b"\x7fELF"):
|
||||||
|
raise CorruptElfFile("missing magic header")
|
||||||
|
|
||||||
|
ei_class, ei_data = struct.unpack_from("BB", file_header, 4)
|
||||||
|
logger.debug("ei_class: 0x%02x ei_data: 0x%02x", ei_class, ei_data)
|
||||||
|
if ei_class == 1:
|
||||||
|
bitness = 32
|
||||||
|
elif ei_class == 2:
|
||||||
|
bitness = 64
|
||||||
|
else:
|
||||||
|
raise CorruptElfFile("invalid ei_class: 0x%02x" % ei_class)
|
||||||
|
|
||||||
|
if ei_data == 1:
|
||||||
|
endian = "<"
|
||||||
|
elif ei_data == 2:
|
||||||
|
endian = ">"
|
||||||
|
else:
|
||||||
|
raise CorruptElfFile("not an ELF file: invalid ei_data: 0x%02x" % ei_data)
|
||||||
|
|
||||||
|
if bitness == 32:
|
||||||
|
(e_phoff,) = struct.unpack_from(endian + "I", file_header, 0x1C)
|
||||||
|
e_phentsize, e_phnum = struct.unpack_from(endian + "HH", file_header, 0x2A)
|
||||||
|
elif bitness == 64:
|
||||||
|
(e_phoff,) = struct.unpack_from(endian + "Q", file_header, 0x20)
|
||||||
|
e_phentsize, e_phnum = struct.unpack_from(endian + "HH", file_header, 0x36)
|
||||||
|
else:
|
||||||
|
raise NotImplemented
|
||||||
|
|
||||||
|
logger.debug("e_phoff: 0x%02x e_phentsize: 0x%02x e_phnum: %d", e_phoff, e_phentsize, e_phnum)
|
||||||
|
|
||||||
|
(ei_osabi,) = struct.unpack_from(endian + "B", file_header, 7)
|
||||||
|
OSABI = {
|
||||||
|
# via pyelftools: https://github.com/eliben/pyelftools/blob/0664de05ed2db3d39041e2d51d19622a8ef4fb0f/elftools/elf/enums.py#L35-L58
|
||||||
|
# 0: "SYSV",
|
||||||
|
1: OS.HPUX,
|
||||||
|
2: OS.NETBSD,
|
||||||
|
3: OS.LINUX,
|
||||||
|
4: OS.HURD,
|
||||||
|
5: OS._86OPEN,
|
||||||
|
6: OS.SOLARIS,
|
||||||
|
7: OS.AIX,
|
||||||
|
8: OS.IRIX,
|
||||||
|
9: OS.FREEBSD,
|
||||||
|
10: OS.TRU64,
|
||||||
|
11: OS.MODESTO,
|
||||||
|
12: OS.OPENBSD,
|
||||||
|
13: OS.OPENVMS,
|
||||||
|
14: OS.NSK,
|
||||||
|
15: OS.AROS,
|
||||||
|
16: OS.FENIXOS,
|
||||||
|
17: OS.CLOUD,
|
||||||
|
# 53: "SORTFIX",
|
||||||
|
# 64: "ARM_AEABI",
|
||||||
|
# 97: "ARM",
|
||||||
|
# 255: "STANDALONE",
|
||||||
|
}
|
||||||
|
logger.debug("ei_osabi: 0x%02x (%s)", ei_osabi, OSABI.get(ei_osabi, "unknown"))
|
||||||
|
|
||||||
|
if ei_osabi in OSABI and ei_osabi != 0x0:
|
||||||
|
# update only if not set
|
||||||
|
# so we can get the debugging output of subsequent strategies
|
||||||
|
ret = OSABI[ei_osabi] if not ret else ret
|
||||||
|
|
||||||
|
f.seek(e_phoff)
|
||||||
|
program_header_size = e_phnum * e_phentsize
|
||||||
|
program_headers = f.read(program_header_size)
|
||||||
|
if len(program_headers) != program_header_size:
|
||||||
|
logger.warning("failed to read program headers")
|
||||||
|
e_phnum = 0
|
||||||
|
|
||||||
|
for i in range(e_phnum):
|
||||||
|
offset = i * e_phentsize
|
||||||
|
phent = program_headers[offset : offset + e_phentsize]
|
||||||
|
|
||||||
|
PT_NOTE = 0x4
|
||||||
|
|
||||||
|
(p_type,) = struct.unpack_from(endian + "I", phent, 0x0)
|
||||||
|
logger.debug("p_type: 0x%04x", p_type)
|
||||||
|
if p_type != PT_NOTE:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if bitness == 32:
|
||||||
|
p_offset, _, _, p_filesz = struct.unpack_from(endian + "IIII", phent, 0x4)
|
||||||
|
elif bitness == 64:
|
||||||
|
p_offset, _, _, p_filesz = struct.unpack_from(endian + "QQQQ", phent, 0x8)
|
||||||
|
else:
|
||||||
|
raise NotImplemented
|
||||||
|
|
||||||
|
logger.debug("p_offset: 0x%02x p_filesz: 0x%04x", p_offset, p_filesz)
|
||||||
|
|
||||||
|
f.seek(p_offset)
|
||||||
|
note = f.read(p_filesz)
|
||||||
|
if len(note) != p_filesz:
|
||||||
|
logger.warning("failed to read note content")
|
||||||
|
continue
|
||||||
|
|
||||||
|
namesz, descsz, type_ = struct.unpack_from(endian + "III", note, 0x0)
|
||||||
|
name_offset = 0xC
|
||||||
|
desc_offset = name_offset + align(namesz, 0x4)
|
||||||
|
|
||||||
|
logger.debug("namesz: 0x%02x descsz: 0x%02x type: 0x%04x", namesz, descsz, type_)
|
||||||
|
|
||||||
|
name = note[name_offset : name_offset + namesz].partition(b"\x00")[0].decode("ascii")
|
||||||
|
logger.debug("name: %s", name)
|
||||||
|
|
||||||
|
if type_ != 1:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if name == "GNU":
|
||||||
|
if descsz < 16:
|
||||||
|
continue
|
||||||
|
|
||||||
|
desc = note[desc_offset : desc_offset + descsz]
|
||||||
|
abi_tag, kmajor, kminor, kpatch = struct.unpack_from(endian + "IIII", desc, 0x0)
|
||||||
|
# via readelf: https://github.com/bminor/binutils-gdb/blob/c0e94211e1ac05049a4ce7c192c9d14d1764eb3e/binutils/readelf.c#L19635-L19658
|
||||||
|
# and here: https://github.com/bminor/binutils-gdb/blob/34c54daa337da9fadf87d2706d6a590ae1f88f4d/include/elf/common.h#L933-L939
|
||||||
|
GNU_ABI_TAG = {
|
||||||
|
0: OS.LINUX,
|
||||||
|
1: OS.HURD,
|
||||||
|
2: OS.SOLARIS,
|
||||||
|
3: OS.FREEBSD,
|
||||||
|
4: OS.NETBSD,
|
||||||
|
5: OS.SYLLABLE,
|
||||||
|
6: OS.NACL,
|
||||||
|
}
|
||||||
|
logger.debug("GNU_ABI_TAG: 0x%02x", abi_tag)
|
||||||
|
|
||||||
|
if abi_tag in GNU_ABI_TAG:
|
||||||
|
# update only if not set
|
||||||
|
# so we can get the debugging output of subsequent strategies
|
||||||
|
ret = GNU_ABI_TAG[abi_tag] if not ret else ret
|
||||||
|
logger.debug("abi tag: %s earliest compatible kernel: %d.%d.%d", ret, kmajor, kminor, kpatch)
|
||||||
|
elif name == "OpenBSD":
|
||||||
|
logger.debug("note owner: %s", "OPENBSD")
|
||||||
|
ret = OS.OPENBSD if not ret else ret
|
||||||
|
elif name == "NetBSD":
|
||||||
|
logger.debug("note owner: %s", "NETBSD")
|
||||||
|
ret = OS.NETBSD if not ret else ret
|
||||||
|
|
||||||
|
for i in range(e_phnum):
|
||||||
|
offset = i * e_phentsize
|
||||||
|
phent = program_headers[offset : offset + e_phentsize]
|
||||||
|
|
||||||
|
PT_INTERP = 0x3
|
||||||
|
|
||||||
|
(p_type,) = struct.unpack_from(endian + "I", phent, 0x0)
|
||||||
|
if p_type != PT_INTERP:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if bitness == 32:
|
||||||
|
p_offset, _, _, p_filesz = struct.unpack_from(endian + "IIII", phent, 0x4)
|
||||||
|
elif bitness == 64:
|
||||||
|
p_offset, _, _, p_filesz = struct.unpack_from(endian + "QQQQ", phent, 0x8)
|
||||||
|
else:
|
||||||
|
raise NotImplemented
|
||||||
|
|
||||||
|
f.seek(p_offset)
|
||||||
|
interp = f.read(p_filesz)
|
||||||
|
if len(interp) != p_filesz:
|
||||||
|
logger.warning("failed to read interp content")
|
||||||
|
continue
|
||||||
|
|
||||||
|
linker = interp.partition(b"\x00")[0].decode("ascii")
|
||||||
|
logger.debug("linker: %s", linker)
|
||||||
|
if "ld-linux" in linker:
|
||||||
|
# update only if not set
|
||||||
|
# so we can get the debugging output of subsequent strategies
|
||||||
|
ret = OS.LINUX if ret is None else ret
|
||||||
|
|
||||||
|
return ret.value if ret is not None else "unknown"
|
||||||
|
|
||||||
|
|
||||||
|
def main(argv=None):
|
||||||
|
if IDAIO.is_runtime_ida():
|
||||||
|
f: BinaryIO = IDAIO()
|
||||||
|
|
||||||
|
else:
|
||||||
|
print("not ida")
|
||||||
|
if argv is None:
|
||||||
|
argv = sys.argv[1:]
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(description="Detect the underlying OS for the given ELF file")
|
||||||
|
parser.add_argument("sample", type=str, help="path to ELF file")
|
||||||
|
|
||||||
|
logging_group = parser.add_argument_group("logging arguments")
|
||||||
|
|
||||||
|
logging_group.add_argument("-d", "--debug", action="store_true", help="enable debugging output on STDERR")
|
||||||
|
logging_group.add_argument(
|
||||||
|
"-q", "--quiet", action="store_true", help="disable all status output except fatal errors"
|
||||||
|
)
|
||||||
|
|
||||||
|
args = parser.parse_args(args=argv)
|
||||||
|
|
||||||
|
if args.quiet:
|
||||||
|
logging.basicConfig(level=logging.WARNING)
|
||||||
|
logging.getLogger().setLevel(logging.WARNING)
|
||||||
|
elif args.debug:
|
||||||
|
logging.basicConfig(level=logging.DEBUG)
|
||||||
|
logging.getLogger().setLevel(logging.DEBUG)
|
||||||
|
else:
|
||||||
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
logging.getLogger().setLevel(logging.INFO)
|
||||||
|
|
||||||
|
f = open(args.sample, "rb")
|
||||||
|
|
||||||
|
with contextlib.closing(f):
|
||||||
|
try:
|
||||||
|
print(detect_elf_os(f))
|
||||||
|
return 0
|
||||||
|
except CorruptElfFile as e:
|
||||||
|
logger.error("corrupt ELF file: %s", str(e.args[0]))
|
||||||
|
return -1
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
if IDAIO.is_runtime_ida():
|
||||||
|
main()
|
||||||
|
else:
|
||||||
|
sys.exit(main())
|
||||||
Reference in New Issue
Block a user