scripts: add script to detect ELF OS

closes #724
This commit is contained in:
William Ballenthin
2021-08-11 13:52:50 -06:00
parent 05f8e2445a
commit baaa8ba2c1

332
scripts/detect-elf-os.py Normal file
View File

@@ -0,0 +1,332 @@
#!/usr/bin/env python2
"""
Copyright (C) 2021 FireEye, Inc. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at: [package root]/LICENSE.txt
Unless required by applicable law or agreed to in writing, software distributed under the License
is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and limitations under the License.
detect-elf-os
Attempt to detect the underlying OS that the given ELF file targets.
"""
import sys
import struct
import logging
import argparse
import contextlib
from enum import Enum
from typing import BinaryIO
logger = logging.getLogger("capa.detect-elf-os")
def align(v, alignment):
remainder = v % alignment
if remainder == 0:
return v
else:
return v + remainder
class IDAIO:
"""
An object that acts as a file-like object,
using bytes from the current IDB workspace.
"""
def __init__(self):
assert IDAIO.is_runtime_ida() == True
super(IDAIO, self).__init__()
import idc
import ida_bytes
import ida_loader
self.offset = 0
def seek(self, offset, whence=0):
assert whence == 0
self.offset = offset
def read(self, size):
ea = ida_loader.get_fileregion_ea(self.offset)
if ea == idc.BADADDR:
# best guess, such as if file is mapped at address 0x0.
ea = self.offset
logger.debug("reading 0x%x bytes at 0x%x (ea: 0x%x)", size, self.offset, ea)
return ida_bytes.get_bytes(ea, size)
def close(self):
return
@staticmethod
def is_runtime_ida():
try:
import idc
except ImportError:
return False
else:
return True
class CorruptElfFile(ValueError):
pass
class OS(str, Enum):
HPUX = "HPUX"
NETBSD = "NETBSD"
LINUX = "LINUX"
HURD = "HURD"
_86OPEN = "86OPEN"
SOLARIS = "SOLARIS"
AIX = "AIX"
IRIX = "IRIX"
FREEBSD = "FREEBSD"
TRU64 = "TRU64"
MODESTO = "MODESTO"
OPENBSD = "OPENBSD"
OPENVMS = "OPENVMS"
NSK = "NSK"
AROS = "AROS"
FENIXOS = "FENIXOS"
CLOUD = "CLOUD"
SORTFIX = "SORTFIX"
ARM_AEABI = "ARM_AEABI"
SYLLABLE = "SYLLABLE"
NACL = "NACL"
def detect_elf_os(f: BinaryIO) -> str:
f.seek(0x0)
file_header = f.read(0x40)
# we'll set this to the detected OS
# prefer the first heuristics,
# but rather than short circuiting,
# we'll still parse out the remainder, for debugging.
ret = None
if not file_header.startswith(b"\x7fELF"):
raise CorruptElfFile("missing magic header")
ei_class, ei_data = struct.unpack_from("BB", file_header, 4)
logger.debug("ei_class: 0x%02x ei_data: 0x%02x", ei_class, ei_data)
if ei_class == 1:
bitness = 32
elif ei_class == 2:
bitness = 64
else:
raise CorruptElfFile("invalid ei_class: 0x%02x" % ei_class)
if ei_data == 1:
endian = "<"
elif ei_data == 2:
endian = ">"
else:
raise CorruptElfFile("not an ELF file: invalid ei_data: 0x%02x" % ei_data)
if bitness == 32:
(e_phoff,) = struct.unpack_from(endian + "I", file_header, 0x1C)
e_phentsize, e_phnum = struct.unpack_from(endian + "HH", file_header, 0x2A)
elif bitness == 64:
(e_phoff,) = struct.unpack_from(endian + "Q", file_header, 0x20)
e_phentsize, e_phnum = struct.unpack_from(endian + "HH", file_header, 0x36)
else:
raise NotImplemented
logger.debug("e_phoff: 0x%02x e_phentsize: 0x%02x e_phnum: %d", e_phoff, e_phentsize, e_phnum)
(ei_osabi,) = struct.unpack_from(endian + "B", file_header, 7)
OSABI = {
# via pyelftools: https://github.com/eliben/pyelftools/blob/0664de05ed2db3d39041e2d51d19622a8ef4fb0f/elftools/elf/enums.py#L35-L58
# 0: "SYSV",
1: OS.HPUX,
2: OS.NETBSD,
3: OS.LINUX,
4: OS.HURD,
5: OS._86OPEN,
6: OS.SOLARIS,
7: OS.AIX,
8: OS.IRIX,
9: OS.FREEBSD,
10: OS.TRU64,
11: OS.MODESTO,
12: OS.OPENBSD,
13: OS.OPENVMS,
14: OS.NSK,
15: OS.AROS,
16: OS.FENIXOS,
17: OS.CLOUD,
# 53: "SORTFIX",
# 64: "ARM_AEABI",
# 97: "ARM",
# 255: "STANDALONE",
}
logger.debug("ei_osabi: 0x%02x (%s)", ei_osabi, OSABI.get(ei_osabi, "unknown"))
if ei_osabi in OSABI and ei_osabi != 0x0:
# update only if not set
# so we can get the debugging output of subsequent strategies
ret = OSABI[ei_osabi] if not ret else ret
f.seek(e_phoff)
program_header_size = e_phnum * e_phentsize
program_headers = f.read(program_header_size)
if len(program_headers) != program_header_size:
logger.warning("failed to read program headers")
e_phnum = 0
for i in range(e_phnum):
offset = i * e_phentsize
phent = program_headers[offset : offset + e_phentsize]
PT_NOTE = 0x4
(p_type,) = struct.unpack_from(endian + "I", phent, 0x0)
logger.debug("p_type: 0x%04x", p_type)
if p_type != PT_NOTE:
continue
if bitness == 32:
p_offset, _, _, p_filesz = struct.unpack_from(endian + "IIII", phent, 0x4)
elif bitness == 64:
p_offset, _, _, p_filesz = struct.unpack_from(endian + "QQQQ", phent, 0x8)
else:
raise NotImplemented
logger.debug("p_offset: 0x%02x p_filesz: 0x%04x", p_offset, p_filesz)
f.seek(p_offset)
note = f.read(p_filesz)
if len(note) != p_filesz:
logger.warning("failed to read note content")
continue
namesz, descsz, type_ = struct.unpack_from(endian + "III", note, 0x0)
name_offset = 0xC
desc_offset = name_offset + align(namesz, 0x4)
logger.debug("namesz: 0x%02x descsz: 0x%02x type: 0x%04x", namesz, descsz, type_)
name = note[name_offset : name_offset + namesz].partition(b"\x00")[0].decode("ascii")
logger.debug("name: %s", name)
if type_ != 1:
continue
if name == "GNU":
if descsz < 16:
continue
desc = note[desc_offset : desc_offset + descsz]
abi_tag, kmajor, kminor, kpatch = struct.unpack_from(endian + "IIII", desc, 0x0)
# via readelf: https://github.com/bminor/binutils-gdb/blob/c0e94211e1ac05049a4ce7c192c9d14d1764eb3e/binutils/readelf.c#L19635-L19658
# and here: https://github.com/bminor/binutils-gdb/blob/34c54daa337da9fadf87d2706d6a590ae1f88f4d/include/elf/common.h#L933-L939
GNU_ABI_TAG = {
0: OS.LINUX,
1: OS.HURD,
2: OS.SOLARIS,
3: OS.FREEBSD,
4: OS.NETBSD,
5: OS.SYLLABLE,
6: OS.NACL,
}
logger.debug("GNU_ABI_TAG: 0x%02x", abi_tag)
if abi_tag in GNU_ABI_TAG:
# update only if not set
# so we can get the debugging output of subsequent strategies
ret = GNU_ABI_TAG[abi_tag] if not ret else ret
logger.debug("abi tag: %s earliest compatible kernel: %d.%d.%d", ret, kmajor, kminor, kpatch)
elif name == "OpenBSD":
logger.debug("note owner: %s", "OPENBSD")
ret = OS.OPENBSD if not ret else ret
elif name == "NetBSD":
logger.debug("note owner: %s", "NETBSD")
ret = OS.NETBSD if not ret else ret
for i in range(e_phnum):
offset = i * e_phentsize
phent = program_headers[offset : offset + e_phentsize]
PT_INTERP = 0x3
(p_type,) = struct.unpack_from(endian + "I", phent, 0x0)
if p_type != PT_INTERP:
continue
if bitness == 32:
p_offset, _, _, p_filesz = struct.unpack_from(endian + "IIII", phent, 0x4)
elif bitness == 64:
p_offset, _, _, p_filesz = struct.unpack_from(endian + "QQQQ", phent, 0x8)
else:
raise NotImplemented
f.seek(p_offset)
interp = f.read(p_filesz)
if len(interp) != p_filesz:
logger.warning("failed to read interp content")
continue
linker = interp.partition(b"\x00")[0].decode("ascii")
logger.debug("linker: %s", linker)
if "ld-linux" in linker:
# update only if not set
# so we can get the debugging output of subsequent strategies
ret = OS.LINUX if ret is None else ret
return ret.value if ret is not None else "unknown"
def main(argv=None):
if IDAIO.is_runtime_ida():
f: BinaryIO = IDAIO()
else:
print("not ida")
if argv is None:
argv = sys.argv[1:]
parser = argparse.ArgumentParser(description="Detect the underlying OS for the given ELF file")
parser.add_argument("sample", type=str, help="path to ELF file")
logging_group = parser.add_argument_group("logging arguments")
logging_group.add_argument("-d", "--debug", action="store_true", help="enable debugging output on STDERR")
logging_group.add_argument(
"-q", "--quiet", action="store_true", help="disable all status output except fatal errors"
)
args = parser.parse_args(args=argv)
if args.quiet:
logging.basicConfig(level=logging.WARNING)
logging.getLogger().setLevel(logging.WARNING)
elif args.debug:
logging.basicConfig(level=logging.DEBUG)
logging.getLogger().setLevel(logging.DEBUG)
else:
logging.basicConfig(level=logging.INFO)
logging.getLogger().setLevel(logging.INFO)
f = open(args.sample, "rb")
with contextlib.closing(f):
try:
print(detect_elf_os(f))
return 0
except CorruptElfFile as e:
logger.error("corrupt ELF file: %s", str(e.args[0]))
return -1
if __name__ == "__main__":
if IDAIO.is_runtime_ida():
main()
else:
sys.exit(main())