From d5df1c16bb36831747078fa6a30847075744d69f Mon Sep 17 00:00:00 2001 From: d-millar <33498836+d-millar@users.noreply.github.com> Date: Wed, 8 Jan 2025 13:16:34 -0500 Subject: [PATCH] GP-326: never say die GP-326: recompiling to htmnl GP-326: recompiling to htmnl GP-326: last? GP-326: getting there GP-326: roll along GP-326: rolling along GP-326: test fix GP-326: miscellaneous post-review fixes GP-326: complicated stuff GP-326: more simple stuff GP-326: navhead fix GP-326: better docs GP-326: html for md GP-326: html for md GP-326: tutorial edits GP-326: tutorial edits GP-326: re-arranging docs GP-326: from review GP-326: adding a debugger GP-326: docs GP-326: using TestResources - tests pass GP-326: working tests GP-326: most cmd/meth tests working GP-326: cmd tests pass GP-326: passes thru putmem GP-326: one test running GP-326: better startup logic GP-326: first pass tests GP-326: misc cleanup GP-326: cleaner startup GP-326: cleanup GP-326: fixes for crash dump GP-326: util cleanup GP-326: objects cont. GP-326: first pass at objects GP-326: some cleanup GP-326: regions GP-326: sections GP-326: modules GP-326: alt launchers GP-326: symbols GP-326: memory GP-326: stack frame - regs + locals GP-326: frames GP-326: threads GP-326: better start sequence GP-326: working launcher GP-326: util.version GP-326: arch --- .../Debug/Debugger-agent-drgn/Module.manifest | 0 Ghidra/Debug/Debugger-agent-drgn/README.md | 1 + Ghidra/Debug/Debugger-agent-drgn/build.gradle | 20 + .../certification.manifest | 11 + .../data/debugger-launchers/core-drgn.sh | 32 + .../data/debugger-launchers/kernel-drgn.sh | 31 + .../data/debugger-launchers/local-drgn.sh | 34 + .../data/support/local-drgn.py | 57 + .../Debugger-agent-drgn/src/main/py/LICENSE | 11 + .../src/main/py/MANIFEST.in | 1 + .../Debugger-agent-drgn/src/main/py/README.md | 3 + .../src/main/py/pyproject.toml | 25 + .../src/main/py/src/ghidradrgn/__init__.py | 16 + .../src/main/py/src/ghidradrgn/arch.py | 209 +++ .../src/main/py/src/ghidradrgn/commands.py | 1411 +++++++++++++++++ .../src/main/py/src/ghidradrgn/hooks.py | 249 +++ .../src/main/py/src/ghidradrgn/methods.py | 388 +++++ .../src/main/py/src/ghidradrgn/schema.xml | 183 +++ .../src/main/py/src/ghidradrgn/util.py | 115 ++ .../TraceRmiLauncherServicePlugin.html | 68 + .../drgn/rmi/AbstractDrgnTraceRmiTest.java | 379 +++++ .../java/agent/drgn/rmi/DrgnCommandsTest.java | 909 +++++++++++ .../java/agent/drgn/rmi/DrgnMethodsTest.java | 286 ++++ .../Debugger/B5-AddingDebuggers.html | 148 ++ .../Debugger/B5-AddingDebuggers.md | 224 +++ GhidraDocs/GhidraClass/Debugger/Makefile | 1 + GhidraDocs/GhidraClass/Debugger/navhead.htm | 3 +- GhidraDocs/certification.manifest | 2 + 28 files changed, 4816 insertions(+), 1 deletion(-) create mode 100644 Ghidra/Debug/Debugger-agent-drgn/Module.manifest create mode 100644 Ghidra/Debug/Debugger-agent-drgn/README.md create mode 100644 Ghidra/Debug/Debugger-agent-drgn/build.gradle create mode 100644 Ghidra/Debug/Debugger-agent-drgn/certification.manifest create mode 100755 Ghidra/Debug/Debugger-agent-drgn/data/debugger-launchers/core-drgn.sh create mode 100755 Ghidra/Debug/Debugger-agent-drgn/data/debugger-launchers/kernel-drgn.sh create mode 100755 Ghidra/Debug/Debugger-agent-drgn/data/debugger-launchers/local-drgn.sh create mode 100644 Ghidra/Debug/Debugger-agent-drgn/data/support/local-drgn.py create mode 100644 Ghidra/Debug/Debugger-agent-drgn/src/main/py/LICENSE create mode 100644 Ghidra/Debug/Debugger-agent-drgn/src/main/py/MANIFEST.in create mode 100644 Ghidra/Debug/Debugger-agent-drgn/src/main/py/README.md create mode 100644 Ghidra/Debug/Debugger-agent-drgn/src/main/py/pyproject.toml create mode 100644 Ghidra/Debug/Debugger-agent-drgn/src/main/py/src/ghidradrgn/__init__.py create mode 100644 Ghidra/Debug/Debugger-agent-drgn/src/main/py/src/ghidradrgn/arch.py create mode 100644 Ghidra/Debug/Debugger-agent-drgn/src/main/py/src/ghidradrgn/commands.py create mode 100644 Ghidra/Debug/Debugger-agent-drgn/src/main/py/src/ghidradrgn/hooks.py create mode 100644 Ghidra/Debug/Debugger-agent-drgn/src/main/py/src/ghidradrgn/methods.py create mode 100644 Ghidra/Debug/Debugger-agent-drgn/src/main/py/src/ghidradrgn/schema.xml create mode 100644 Ghidra/Debug/Debugger-agent-drgn/src/main/py/src/ghidradrgn/util.py create mode 100644 Ghidra/Test/DebuggerIntegrationTest/src/test.slow/java/agent/drgn/rmi/AbstractDrgnTraceRmiTest.java create mode 100644 Ghidra/Test/DebuggerIntegrationTest/src/test.slow/java/agent/drgn/rmi/DrgnCommandsTest.java create mode 100644 Ghidra/Test/DebuggerIntegrationTest/src/test.slow/java/agent/drgn/rmi/DrgnMethodsTest.java create mode 100644 GhidraDocs/GhidraClass/Debugger/B5-AddingDebuggers.html create mode 100644 GhidraDocs/GhidraClass/Debugger/B5-AddingDebuggers.md diff --git a/Ghidra/Debug/Debugger-agent-drgn/Module.manifest b/Ghidra/Debug/Debugger-agent-drgn/Module.manifest new file mode 100644 index 00000000000..e69de29bb2d diff --git a/Ghidra/Debug/Debugger-agent-drgn/README.md b/Ghidra/Debug/Debugger-agent-drgn/README.md new file mode 100644 index 00000000000..65c052c5db9 --- /dev/null +++ b/Ghidra/Debug/Debugger-agent-drgn/README.md @@ -0,0 +1 @@ +# Debugger-agent-drgn diff --git a/Ghidra/Debug/Debugger-agent-drgn/build.gradle b/Ghidra/Debug/Debugger-agent-drgn/build.gradle new file mode 100644 index 00000000000..443df89fbf3 --- /dev/null +++ b/Ghidra/Debug/Debugger-agent-drgn/build.gradle @@ -0,0 +1,20 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +apply from: "$rootProject.projectDir/gradle/distributableGhidraModule.gradle" +apply from: "$rootProject.projectDir/gradle/hasPythonPackage.gradle" + +apply plugin: 'eclipse' +eclipse.project.name = 'Debug Debugger-agent-drgn' diff --git a/Ghidra/Debug/Debugger-agent-drgn/certification.manifest b/Ghidra/Debug/Debugger-agent-drgn/certification.manifest new file mode 100644 index 00000000000..d342dc45652 --- /dev/null +++ b/Ghidra/Debug/Debugger-agent-drgn/certification.manifest @@ -0,0 +1,11 @@ +##VERSION: 2.0 +##MODULE IP: Apache License 2.0 +##MODULE IP: Apache License 2.0 with LLVM Exceptions +Module.manifest||GHIDRA||||END| +README.md||GHIDRA||||END| +build.gradle||GHIDRA||||END| +src/main/py/LICENSE||GHIDRA||||END| +src/main/py/MANIFEST.in||GHIDRA||||END| +src/main/py/README.md||GHIDRA||||END| +src/main/py/pyproject.toml||GHIDRA||||END| +src/main/py/src/ghidradrgn/schema.xml||GHIDRA||||END| diff --git a/Ghidra/Debug/Debugger-agent-drgn/data/debugger-launchers/core-drgn.sh b/Ghidra/Debug/Debugger-agent-drgn/data/debugger-launchers/core-drgn.sh new file mode 100755 index 00000000000..e1bb7a2db0f --- /dev/null +++ b/Ghidra/Debug/Debugger-agent-drgn/data/debugger-launchers/core-drgn.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash +## ### +# IP: GHIDRA +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +## +#@title drgn-core +#@desc +#@desc

Launch with drgn-core

+#@desc

+#@desc This will attach to an existing core dump using drgn. +#@desc For setup instructions, press F1. +#@desc

+#@desc +#@menu-group drgn +#@icon icon.debugger +#@help TraceRmiLauncherServicePlugin#drgn-core +#@env OPT_TARGET_IMG:file!="" "Core dump" "The target core dump" + +export OPT_TARGET_KIND="coredump" +drgn -c "$OPT_TARGET_IMG" ../support/local-drgn.py + diff --git a/Ghidra/Debug/Debugger-agent-drgn/data/debugger-launchers/kernel-drgn.sh b/Ghidra/Debug/Debugger-agent-drgn/data/debugger-launchers/kernel-drgn.sh new file mode 100755 index 00000000000..bf751266d47 --- /dev/null +++ b/Ghidra/Debug/Debugger-agent-drgn/data/debugger-launchers/kernel-drgn.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash +## ### +# IP: GHIDRA +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +## +#@title drgn-kernel +#@desc +#@desc

Launch with drgn-kernel

+#@desc

+#@desc This will attach to the local machine's kernel using drgn. +#@desc For setup instructions, press F1. +#@desc

+#@desc +#@menu-group drgn +#@icon icon.debugger +#@help TraceRmiLauncherServicePlugin#drgn-kernel + +export OPT_TARGET_KIND="kernel" +sudo -E drgn ../support/local-drgn.py + diff --git a/Ghidra/Debug/Debugger-agent-drgn/data/debugger-launchers/local-drgn.sh b/Ghidra/Debug/Debugger-agent-drgn/data/debugger-launchers/local-drgn.sh new file mode 100755 index 00000000000..edf9d0f94ef --- /dev/null +++ b/Ghidra/Debug/Debugger-agent-drgn/data/debugger-launchers/local-drgn.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash +## ### +# IP: GHIDRA +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +## +#@title drgn +#@desc +#@desc

Launch with drgn

+#@desc

+#@desc This will attach to a target running on the local machine using drgn. +#@desc For setup instructions, press F1. +#@desc

+#@desc +#@menu-group drgn +#@icon icon.debugger +#@help TraceRmiLauncherServicePlugin#drgn +#@env OPT_TARGET_PID:int=44068 "PID" "The target's process id" + +export OPT_TARGET_KIND="user" +# sudo -E drgn -p "$OPT_TARGET_PID" ../support/local-drgn.py +# or 'echo 0 > /proc/sys/kernel/yama/ptrace_scope' +drgn -p "$OPT_TARGET_PID" ../support/local-drgn.py + diff --git a/Ghidra/Debug/Debugger-agent-drgn/data/support/local-drgn.py b/Ghidra/Debug/Debugger-agent-drgn/data/support/local-drgn.py new file mode 100644 index 00000000000..2d5e97afa8f --- /dev/null +++ b/Ghidra/Debug/Debugger-agent-drgn/data/support/local-drgn.py @@ -0,0 +1,57 @@ +## ### +# IP: GHIDRA +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +## + +# From drgn: +# EASY-INSTALL-ENTRY-SCRIPT: 'drgn==0.0.24','console_scripts','drgn' +import os +import re +import sys + +import drgn.cli + +home = os.getenv('GHIDRA_HOME') + +if os.path.isdir(f'{home}/ghidra/.git'): + sys.path.append( + f'{home}/ghidra/Ghidra/Debug/Debugger-agent-drgn/build/pypkg/src') + sys.path.append( + f'{home}/ghidra/Ghidra/Debug/Debugger-rmi-trace/build/pypkg/src') +elif os.path.isdir(f'{home}/.git'): + sys.path.append( + f'{home}/Ghidra/Debug/Debugger-agent-drgn/build/pypkg/src') + sys.path.append( + f'{home}/Ghidra/Debug/Debugger-rmi-trace/build/pypkg/src') +else: + sys.path.append( + f'{home}/Ghidra/Debug/Debugger-agent-drgn/pypkg/src') + sys.path.append(f'{home}/Ghidra/Debug/Debugger-rmi-trace/pypkg/src') + + +def main(): + from ghidradrgn import commands as cmd + cmd.ghidra_trace_connect(address=os.getenv('GHIDRA_TRACE_RMI_ADDR')) + cmd.ghidra_trace_create(start_trace=True) + cmd.ghidra_trace_txstart() + cmd.ghidra_trace_put_all() + cmd.ghidra_trace_txcommit() + cmd.ghidra_trace_activate() + drgn.cli.run_interactive(cmd.prog) + + +if __name__ == '__main__': + main() + + diff --git a/Ghidra/Debug/Debugger-agent-drgn/src/main/py/LICENSE b/Ghidra/Debug/Debugger-agent-drgn/src/main/py/LICENSE new file mode 100644 index 00000000000..c026b6b79a3 --- /dev/null +++ b/Ghidra/Debug/Debugger-agent-drgn/src/main/py/LICENSE @@ -0,0 +1,11 @@ +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/Ghidra/Debug/Debugger-agent-drgn/src/main/py/MANIFEST.in b/Ghidra/Debug/Debugger-agent-drgn/src/main/py/MANIFEST.in new file mode 100644 index 00000000000..0fc1562e1d4 --- /dev/null +++ b/Ghidra/Debug/Debugger-agent-drgn/src/main/py/MANIFEST.in @@ -0,0 +1 @@ +include src/ghidradrgn/schema.xml \ No newline at end of file diff --git a/Ghidra/Debug/Debugger-agent-drgn/src/main/py/README.md b/Ghidra/Debug/Debugger-agent-drgn/src/main/py/README.md new file mode 100644 index 00000000000..ba7656544f7 --- /dev/null +++ b/Ghidra/Debug/Debugger-agent-drgn/src/main/py/README.md @@ -0,0 +1,3 @@ +# Ghidra Trace RMI for drgn + +Package for connecting drgn to Ghidra via Trace RMI. diff --git a/Ghidra/Debug/Debugger-agent-drgn/src/main/py/pyproject.toml b/Ghidra/Debug/Debugger-agent-drgn/src/main/py/pyproject.toml new file mode 100644 index 00000000000..516a2ffc324 --- /dev/null +++ b/Ghidra/Debug/Debugger-agent-drgn/src/main/py/pyproject.toml @@ -0,0 +1,25 @@ +[build-system] +requires = ["setuptools"] +build-backend = "setuptools.build_meta" + +[project] +name = "ghidradrgn" +version = "11.3" +authors = [ + { name="Ghidra Development Team" }, +] +description = "Ghidra's Plugin for drgn" +readme = "README.md" +requires-python = ">=3.7" +classifiers = [ + "Programming Language :: Python :: 3", + "License :: OSI Approved :: Apache Software License", + "Operating System :: OS Independent", +] +dependencies = [ + "ghidratrace==11.3", +] + +[project.urls] +"Homepage" = "https://github.com/NationalSecurityAgency/ghidra" +"Bug Tracker" = "https://github.com/NationalSecurityAgency/ghidra/issues" diff --git a/Ghidra/Debug/Debugger-agent-drgn/src/main/py/src/ghidradrgn/__init__.py b/Ghidra/Debug/Debugger-agent-drgn/src/main/py/src/ghidradrgn/__init__.py new file mode 100644 index 00000000000..7e7e1e10534 --- /dev/null +++ b/Ghidra/Debug/Debugger-agent-drgn/src/main/py/src/ghidradrgn/__init__.py @@ -0,0 +1,16 @@ +## ### +# IP: GHIDRA +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +## +from . import util, commands diff --git a/Ghidra/Debug/Debugger-agent-drgn/src/main/py/src/ghidradrgn/arch.py b/Ghidra/Debug/Debugger-agent-drgn/src/main/py/src/ghidradrgn/arch.py new file mode 100644 index 00000000000..bb4e9e6ee08 --- /dev/null +++ b/Ghidra/Debug/Debugger-agent-drgn/src/main/py/src/ghidradrgn/arch.py @@ -0,0 +1,209 @@ +## ### +# IP: GHIDRA +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +## +from ghidratrace.client import Address, RegVal +import drgn + +from . import util + + +# NOTE: This map is derived from the ldefs using a script +language_map = { + 'AARCH64': ['AARCH64:BE:64:v8A', 'AARCH64:LE:64:AppleSilicon', 'AARCH64:LE:64:v8A'], + 'ARM': ['ARM:BE:32:v8', 'ARM:BE:32:v8T', 'ARM:LE:32:v8', 'ARM:LE:32:v8T'], + 'PPC64': ['PowerPC:BE:64:4xx', 'PowerPC:LE:64:4xx'], + 'S390': [], + 'S390X': [], + 'I386': ['x86:LE:32:default'], + 'X86_64': ['x86:LE:64:default'], + 'UNKNOWN': ['DATA:LE:64:default', 'DATA:LE:64:default'], +} + +data64_compiler_map = { + None: 'pointer64', +} + +default_compiler_map = { + 'Language.C': 'default', +} + +x86_compiler_map = { + 'Language.C': 'gcc', +} + +compiler_map = { + 'DATA:BE:64:': data64_compiler_map, + 'DATA:LE:64:': data64_compiler_map, + 'x86:LE:32:': x86_compiler_map, + 'x86:LE:64:': x86_compiler_map, + 'AARCH64:LE:64:': default_compiler_map, + 'ARM:BE:32:': default_compiler_map, + 'ARM:LE:32:': default_compiler_map, + 'PowerPC:BE:64:': default_compiler_map, + 'PowerPC:LE:64:': default_compiler_map, +} + + +def get_arch(): + platform = drgn.host_platform + return platform.arch.name + + +def get_endian(): + parm = util.get_convenience_variable('endian') + if parm != 'auto': + return parm + platform = drgn.host_platform + order = platform.flags.IS_LITTLE_ENDIAN + if order.value > 0: + return 'little' + else: + return 'big' + + +def get_size(): + parm = util.get_convenience_variable('size') + if parm != 'auto': + return parm + platform = drgn.host_platform + order = platform.flags.IS_64_BIT + if order.value > 0: + return '64' + else: + return '32' + + +def get_osabi(): + return "Language.C" + + +def compute_ghidra_language(): + # First, check if the parameter is set + lang = util.get_convenience_variable('ghidra-language') + if lang != 'auto': + return lang + + # Get the list of possible languages for the arch. We'll need to sift + # through them by endian and probably prefer default/simpler variants. The + # heuristic for "simpler" will be 'default' then shortest variant id. + arch = get_arch() + endian = get_endian() + sz = get_size() + lebe = ':BE:' if endian == 'big' else ':LE:' + if not arch in language_map: + return 'DATA' + lebe + sz +':default' + langs = language_map[arch] + matched_endian = sorted( + (l for l in langs if lebe in l), + key=lambda l: 0 if l.endswith(':default') else len(l) + ) + if len(matched_endian) > 0: + return matched_endian[0] + # NOTE: I'm disinclined to fall back to a language match with wrong endian. + return 'DATA' + lebe + sz + ':default' + + +def compute_ghidra_compiler(lang): + # First, check if the parameter is set + comp = util.get_convenience_variable('ghidra-compiler') + if comp != 'auto': + return comp + + # Check if the selected lang has specific compiler recommendations + matched_lang = sorted( + (l for l in compiler_map if l in lang), +# key=lambda l: compiler_map[l] + ) + if len(matched_lang) == 0: + print(f"{lang} not found in compiler map - using default compiler") + return 'default' + + comp_map = compiler_map[matched_lang[0]] + if comp_map == data64_compiler_map: + print(f"Using the DATA64 compiler map") + osabi = get_osabi() + if osabi in comp_map: + return comp_map[osabi] + if lang.startswith("X86:"): + print(f"{osabi} not found in compiler map - using gcc") + return 'gcc' + if None in comp_map: + return comp_map[None] + print(f"{osabi} not found in compiler map - using default compiler") + return 'default' + + +def compute_ghidra_lcsp(): + lang = compute_ghidra_language() + comp = compute_ghidra_compiler(lang) + return lang, comp + + +class DefaultMemoryMapper(object): + + def __init__(self, defaultSpace): + self.defaultSpace = defaultSpace + + def map(self, proc: drgn.Program, offset: int): + space = self.defaultSpace + return self.defaultSpace, Address(space, offset) + + def map_back(self, proc: drgn.Program, address: Address) -> int: + if address.space == self.defaultSpace: + return address.offset + raise ValueError( + f"Address {address} is not in process {proc}") + + +DEFAULT_MEMORY_MAPPER = DefaultMemoryMapper('ram') + +memory_mappers = {} + + +def compute_memory_mapper(lang): + if not lang in memory_mappers: + return DEFAULT_MEMORY_MAPPER + return memory_mappers[lang] + + +class DefaultRegisterMapper(object): + + def __init__(self, byte_order): + if not byte_order in ['big', 'little']: + raise ValueError("Invalid byte_order: {}".format(byte_order)) + self.byte_order = byte_order + self.union_winners = {} + + def map_name(self, proc, name): + return name + + def map_value(self, proc, name, value): + return RegVal(self.map_name(proc, name), value) + + def map_name_back(self, proc, name): + return name + + def map_value_back(self, proc, name, value): + return RegVal(self.map_name_back(proc, name), value) + + +DEFAULT_BE_REGISTER_MAPPER = DefaultRegisterMapper('big') +DEFAULT_LE_REGISTER_MAPPER = DefaultRegisterMapper('little') + +def compute_register_mapper(lang): + if ':BE:' in lang: + return DEFAULT_BE_REGISTER_MAPPER + else: + return DEFAULT_LE_REGISTER_MAPPER diff --git a/Ghidra/Debug/Debugger-agent-drgn/src/main/py/src/ghidradrgn/commands.py b/Ghidra/Debug/Debugger-agent-drgn/src/main/py/src/ghidradrgn/commands.py new file mode 100644 index 00000000000..8f308ce4523 --- /dev/null +++ b/Ghidra/Debug/Debugger-agent-drgn/src/main/py/src/ghidradrgn/commands.py @@ -0,0 +1,1411 @@ +## ### +# IP: GHIDRA +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +## +import code +from contextlib import contextmanager +import inspect +import os.path +import re +import socket +import sys +import time + +import drgn +import drgn.cli +from ghidratrace import sch +from ghidratrace.client import Client, Address, AddressRange, TraceObject + +from . import util, arch, methods, hooks + + +PAGE_SIZE = 4096 + +AVAILABLES_PATH = 'Available' +AVAILABLE_KEY_PATTERN = '[{pid}]' +AVAILABLE_PATTERN = AVAILABLES_PATH + AVAILABLE_KEY_PATTERN +PROCESSES_PATH = 'Processes' +PROCESS_KEY_PATTERN = '[{procnum}]' +PROCESS_PATTERN = PROCESSES_PATH + PROCESS_KEY_PATTERN +ENV_PATTERN = PROCESS_PATTERN + '.Environment' +THREADS_PATTERN = PROCESS_PATTERN + '.Threads' +THREAD_KEY_PATTERN = '[{tnum}]' +THREAD_PATTERN = THREADS_PATTERN + THREAD_KEY_PATTERN +STACK_PATTERN = THREAD_PATTERN + '.Stack' +FRAME_KEY_PATTERN = '[{level}]' +FRAME_PATTERN = STACK_PATTERN + FRAME_KEY_PATTERN +REGS_PATTERN = FRAME_PATTERN + '.Registers' +LOCALS_PATTERN = FRAME_PATTERN + '.Locals' +MEMORY_PATTERN = PROCESS_PATTERN + '.Memory' +REGION_KEY_PATTERN = '[{start:08x}]' +REGION_PATTERN = MEMORY_PATTERN + REGION_KEY_PATTERN +MODULES_PATTERN = PROCESS_PATTERN + '.Modules' +MODULE_KEY_PATTERN = '[{modpath}]' +MODULE_PATTERN = MODULES_PATTERN + MODULE_KEY_PATTERN +SECTIONS_PATTERN = MODULE_PATTERN + '.Sections' +SECTION_KEY_PATTERN = '[{secname}]' +SECTION_PATTERN = SECTIONS_PATTERN + SECTION_KEY_PATTERN +SYMBOLS_PATTERN = PROCESS_PATTERN + '.Symbols' +SYMBOL_KEY_PATTERN = '[{sid}]' +SYMBOL_PATTERN = SYMBOLS_PATTERN + SYMBOL_KEY_PATTERN + +PROGRAMS = {} + +class ErrorWithCode(Exception): + + def __init__(self, code): + self.code = code + + def __str__(self) -> str: + return repr(self.code) + + +class State(object): + + def __init__(self): + self.reset_client() + + def require_client(self): + if self.client is None: + raise RuntimeError("Not connected") + return self.client + + def require_no_client(self): + if self.client != None: + raise RuntimeError("Already connected") + + def reset_client(self): + self.client = None + self.reset_trace() + + def require_trace(self): + if self.trace is None: + raise RuntimeError("No trace active") + return self.trace + + def require_no_trace(self): + if self.trace != None: + raise RuntimeError("Trace already started") + + def reset_trace(self): + self.trace = None + util.set_convenience_variable('_ghidra_tracing', "false") + self.reset_tx() + + def require_tx(self): + if self.tx is None: + raise RuntimeError("No transaction") + return self.tx + + def require_no_tx(self): + if self.tx != None: + raise RuntimeError("Transaction already started") + + def reset_tx(self): + self.tx = None + + +STATE = State() + + +def ghidra_trace_connect(address=None): + """ + Connect Python to Ghidra for tracing + + Address must be of the form 'host:port' + """ + + STATE.require_no_client() + if address is None: + raise RuntimeError( + "'ghidra_trace_connect': missing required argument 'address'") + + parts = address.split(':') + if len(parts) != 2: + raise RuntimeError("address must be in the form 'host:port'") + host, port = parts + try: + c = socket.socket() + c.connect((host, int(port))) + # TODO: Can we get version info from the DLL? + STATE.client = Client(c, "drgn", methods.REGISTRY) + print(f"Connected to {STATE.client.description} at {address}") + except ValueError: + raise RuntimeError("port must be numeric") + + +def ghidra_trace_listen(address='0.0.0.0:0'): + """ + Listen for Ghidra to connect for tracing + + Takes an optional address for the host and port on which to listen. Either + the form 'host:port' or just 'port'. If omitted, it will bind to an + ephemeral port on all interfaces. If only the port is given, it will bind to + that port on all interfaces. This command will block until the connection is + established. + """ + + STATE.require_no_client() + parts = address.split(':') + if len(parts) == 1: + host, port = '0.0.0.0', parts[0] + elif len(parts) == 2: + host, port = parts + else: + raise RuntimeError("address must be 'port' or 'host:port'") + + try: + s = socket.socket() + s.bind((host, int(port))) + host, port = s.getsockname() + s.listen(1) + print("Listening at {}:{}...".format(host, port)) + c, (chost, cport) = s.accept() + s.close() + print("Connection from {}:{}".format(chost, cport)) + STATE.client = Client(c, "dbgeng.dll", methods.REGISTRY) + except ValueError: + raise RuntimeError("port must be numeric") + + +def ghidra_trace_disconnect(): + """Disconnect Python from Ghidra for tracing""" + + STATE.require_client().close() + STATE.reset_client() + + +def start_trace(name): + language, compiler = arch.compute_ghidra_lcsp() + if name is None: + name = 'drgn/noname' + STATE.trace = STATE.client.create_trace(name, language, compiler) + # TODO: Is adding an attribute like this recommended in Python? + STATE.trace.memory_mapper = arch.compute_memory_mapper(language) + STATE.trace.register_mapper = arch.compute_register_mapper(language) + + parent = os.path.dirname(inspect.getfile(inspect.currentframe())) + schema_fn = os.path.join(parent, 'schema.xml') + with open(schema_fn, 'r') as schema_file: + schema_xml = schema_file.read() + with STATE.trace.open_tx("Create Root Object"): + root = STATE.trace.create_root_object(schema_xml, 'DrgnRoot') + root.set_value('_display', 'drgn version ' + util.DRGN_VERSION.full) + util.set_convenience_variable('_ghidra_tracing', "true") + + +def ghidra_trace_start(name=None): + """Start a Trace in Ghidra""" + + STATE.require_client() + STATE.require_no_trace() + start_trace(name) + + +def ghidra_trace_stop(): + """Stop the Trace in Ghidra""" + + STATE.require_trace().close() + STATE.reset_trace() + + +def ghidra_trace_restart(name=None): + """Restart or start the Trace in Ghidra""" + + STATE.require_client() + if STATE.trace != None: + STATE.trace.close() + STATE.reset_trace() + start_trace(name) + + + +def ghidra_trace_create(start_trace=True): + """ + Create a session. + """ + + global prog + prog = drgn.Program() + kind = os.getenv('OPT_TARGET_KIND') + if kind == "kernel": + prog.set_kernel() + elif kind == "coredump": + img = os.getenv('OPT_TARGET_IMG') + prog.set_core_dump(img) + if '/' in img: + img = img[img.rindex('/')+1:] + else: + pid = int(os.getenv('OPT_TARGET_PID')) + prog.set_pid(pid) + util.selected_pid = pid + + default_symbols = {"default": True, "main": True} + try: + prog.load_debug_info(None, **default_symbols) + except drgn.MissingDebugInfoError as e: + print(e) + + if kind == "kernel": + img = prog.main_module().name + util.selected_tid = next(prog.threads()).tid + elif kind == "coredump": + util.selected_tid = prog.crashed_thread().tid + else: + img = prog.main_module().name + util.selected_tid = prog.main_thread().tid + + if start_trace: + ghidra_trace_start(img) + + PROGRAMS[util.selected_pid] = prog + + +def ghidra_trace_info(): + """Get info about the Ghidra connection""" + + if STATE.client is None: + print("Not connected to Ghidra") + return + host, port = STATE.client.s.getpeername() + print(f"Connected to {STATE.client.description} at {host}:{port}") + if STATE.trace is None: + print("No trace") + return + print("Trace active") + + +def ghidra_trace_info_lcsp(): + """ + Get the selected Ghidra language-compiler-spec pair. + """ + + language, compiler = arch.compute_ghidra_lcsp() + print("Selected Ghidra language: {}".format(language)) + print("Selected Ghidra compiler: {}".format(compiler)) + + +def ghidra_trace_txstart(description="tx"): + """ + Start a transaction on the trace + """ + + STATE.require_no_tx() + STATE.tx = STATE.require_trace().start_tx(description, undoable=False) + + +def ghidra_trace_txcommit(): + """ + Commit the current transaction + """ + + STATE.require_tx().commit() + STATE.reset_tx() + + +def ghidra_trace_txabort(): + """ + Abort the current transaction + + Use only in emergencies. + """ + + tx = STATE.require_tx() + print("Aborting trace transaction!") + tx.abort() + STATE.reset_tx() + + +@contextmanager +def open_tracked_tx(description): + with STATE.require_trace().open_tx(description) as tx: + STATE.tx = tx + yield tx + STATE.reset_tx() + + +def ghidra_trace_save(): + """ + Save the current trace + """ + + STATE.require_trace().save() + + +def ghidra_trace_new_snap(description=None): + """ + Create a new snapshot + + Subsequent modifications to machine state will affect the new snapshot. + """ + + description = str(description) + STATE.require_tx() + return {'snap': STATE.require_trace().snapshot(description)} + + +def ghidra_trace_set_snap(snap=None): + """ + Go to a snapshot + + Subsequent modifications to machine state will affect the given snapshot. + """ + + STATE.require_trace().set_snap(int(snap)) + + +def quantize_pages(start, end): + return (start // PAGE_SIZE * PAGE_SIZE, (end + PAGE_SIZE - 1) // PAGE_SIZE * PAGE_SIZE) + + + +def put_bytes(start, end, pages, display_result): + trace = STATE.require_trace() + if pages: + start, end = quantize_pages(start, end) + nproc = util.selected_process() + if end - start <= 0: + return {'count': 0} + try: + buf = prog.read(start, end - start) + except Exception as e: + return {'count': 0} + + count = 0 + if buf != None: + base, addr = trace.memory_mapper.map(nproc, start) + if base != addr.space: + trace.create_overlay_space(base, addr.space) + count = trace.put_bytes(addr, buf) + if display_result: + print("Wrote {} bytes".format(count)) + return {'count': count} + + +def eval_address(address): + try: + nproc = util.selected_process() + trace = STATE.require_trace() + base, addr = trace.memory_mapper.map(nproc, address) + if base != addr.space: + trace.create_overlay_space(base, addr.space) + return addr + except Exception: + raise RuntimeError("Cannot convert '{}' to address".format(address)) + + +def eval_range(address, length): + start = address + try: + end = start + length + except Exception as e: + raise RuntimeError("Cannot convert '{}' to length".format(length)) + return start, end + + +def putmem(address, length, pages=True, display_result=True): + start, end = eval_range(address, length) + return put_bytes(start, end, pages, display_result) + + +def ghidra_trace_putmem(address, length, pages=True): + """ + Record the given block of memory into the Ghidra trace. + """ + + STATE.require_tx() + return putmem(address, length, pages, True) + + +def putmem_state(address, length, state, pages=True): + STATE.trace.validate_state(state) + start, end = eval_range(address, length) + if pages: + start, end = quantize_pages(start, end) + nproc = util.selected_process() + base, addr = STATE.trace.memory_mapper.map(nproc, start) + if base != addr.space and state != 'unknown': + STATE.trace.create_overlay_space(base, addr.space) + STATE.trace.set_memory_state(addr.extend(end - start), state) + + +def ghidra_trace_putmem_state(address, length, state, pages=True): + """ + Set the state of the given range of memory in the Ghidra trace. + """ + + STATE.require_tx() + return putmem_state(address, length, state, pages) + + +def ghidra_trace_delmem(address, length): + """ + Delete the given range of memory from the Ghidra trace. + + Why would you do this? Keep in mind putmem quantizes to full pages by + default, usually to take advantage of spatial locality. This command does + not quantize. You must do that yourself, if necessary. + """ + + STATE.require_tx() + start, end = eval_range(address, length) + nproc = util.selected_process() + base, addr = STATE.trace.memory_mapper.map(nproc, start) + # Do not create the space. We're deleting stuff. + STATE.trace.delete_bytes(addr.extend(end - start)) + + +def putreg(): + nproc = util.selected_process() + if nproc < 0: + return + nthrd = util.selected_thread() + if nthrd < 0: + return + nframe = util.selected_frame() + if nframe < 0: + return + space = REGS_PATTERN.format(procnum=nproc, tnum=nthrd, level=nframe) + STATE.trace.create_overlay_space('register', space) + robj = STATE.trace.create_object(space) + robj.insert() + mapper = STATE.trace.register_mapper + + thread = prog.thread(nthrd) + try: + frames = thread.stack_trace() + except Exception as e: + print(e) + return + + regs = frames[nframe].registers() + endian = arch.get_endian() + sz = int(int(arch.get_size())/8) + values = [] + for key in regs.keys(): + try: + value = regs[key] + except Exception: + value = 0 + try: + rv = value.to_bytes(sz, endian) + values.append(mapper.map_value(nproc, key, rv)) + robj.set_value(key, hex(value)) + except Exception: + pass + return {'missing': STATE.trace.put_registers(space, values)} + + +def ghidra_trace_putreg(): + """ + Record the given register group for the current frame into the Ghidra trace. + + If no group is specified, 'all' is assumed. + """ + + STATE.require_tx() + putreg() + + +def ghidra_trace_delreg(): + """ + Delete the given register group for the curent frame from the Ghidra trace. + + Why would you do this? If no group is specified, 'all' is assumed. + """ + + STATE.require_tx() + nproc = util.selected_process() + nthrd = util.selected_thread() + if nthrd < 0: + return + nframe = util.selected_frame() + if nframe < 0: + return + space = REGS_PATTERN.format(procnum=nproc, tnum=nthrd, level=nframe) + + thread = prog.thread(nthrd) + try: + frames = thread.stack_trace() + except Exception as e: + print(e) + return + + regs = frames[nframe].registers() + names = [] + for key in regs.keys(): + names.append(key) + STATE.trace.delete_registers(space, names) + + +def put_object(lpath, key, value): + nproc = util.selected_process() + lobj = STATE.trace.create_object(lpath+"."+key) + lobj.insert() + if hasattr(value, "type_"): + vtype = value.type_ + vkind = vtype.kind + lobj.set_value('_display', '{} [{}]'.format(key, vtype.type_name())) + lobj.set_value('Kind', str(vkind)) + lobj.set_value('Type', str(vtype)) + else: + lobj.set_value('_display', '{} [{}:{}]'.format(key, type(value), str(value))) + lobj.set_value('Value', str(value)) + return + + if hasattr(value, "absent_"): + if value.absent_: + lobj.set_value('Value', '') + return + if hasattr(value, "address_"): + vaddr = value.address_ + if vaddr is not None: + base, addr = STATE.trace.memory_mapper.map(nproc, vaddr) + lobj.set_value('Address', addr) + if hasattr(value, "value_"): + vvalue = value.value_() + + if vkind is drgn.TypeKind.POINTER: + base, addr = STATE.trace.memory_mapper.map(nproc, vvalue) + lobj.set_value('Address', addr) + return + if vkind is drgn.TypeKind.TYPEDEF: + lobj.set_value('_display', '{} [{}:{}]'.format(key, type(vvalue), str(vvalue))) + lobj.set_value('Value', str(vvalue)) + return + if vkind is drgn.TypeKind.UNION or vkind is drgn.TypeKind.STRUCT or vkind is drgn.TypeKind.CLASS: + for k in vvalue.keys(): + put_object(lobj.path+'.Members', k, vvalue[k]) + return + + lobj.set_value('_display', '{} [{}:{}]'.format(key, type(vvalue), str(vvalue))) + lobj.set_value('Value', str(vvalue)) + + +def put_objects(pobj, parent): + ppath = pobj.path + '.Members' + for k in parent.keys(): + put_object(ppath, k, parent[k]) + + +def put_locals(): + nproc = util.selected_process() + if nproc < 0: + return + nthrd = util.selected_thread() + if nthrd < 0: + return + nframe = util.selected_frame() + if nframe < 0: + return + lpath = LOCALS_PATTERN.format(procnum=nproc, tnum=nthrd, level=nframe) + lobj = STATE.trace.create_object(lpath) + lobj.insert() + + thread = prog.thread(nthrd) + frames = thread.stack_trace() + frame = frames[nframe] + locs = frame.locals() + for key in locs: + value = frame[key] + put_object(lpath, key, value) + + +def ghidra_trace_put_locals(): + """ + Record the local vars for the current frame into the Ghidra trace. + """ + + STATE.require_tx() + put_locals() + + + +def ghidra_trace_create_obj(path=None): + """ + Create an object in the Ghidra trace. + + The new object is in a detached state, so it may not be immediately + recognized by the Debugger GUI. Use 'ghidra_trace_insert-obj' to finish the + object, after all its required attributes are set. + """ + + STATE.require_tx() + obj = STATE.trace.create_object(path) + obj.insert() + print("Created object: id={}, path='{}'".format(obj.id, obj.path)) + + +def ghidra_trace_insert_obj(path): + """ + Insert an object into the Ghidra trace. + """ + + # NOTE: id parameter is probably not necessary, since this command is for + # humans. + STATE.require_tx() + span = STATE.trace.proxy_object_path(path).insert() + print("Inserted object: lifespan={}".format(span)) + + +def ghidra_trace_remove_obj(path): + """ + Remove an object from the Ghidra trace. + + This does not delete the object. It just removes it from the tree for the + current snap and onwards. + """ + + STATE.require_tx() + STATE.trace.proxy_object_path(path).remove() + + +def to_bytes(value): + return bytes(ord(value[i]) if type(value[i]) == str else int(value[i]) for i in range(0, len(value))) + + +def to_string(value, encoding): + b = bytes(ord(value[i]) if type(value[i]) == str else int( + value[i]) for i in range(0, len(value))) + return str(b, encoding) + + +def to_bool_list(value): + return [bool(value[i]) for i in range(0, len(value))] + + +def to_int_list(value): + return [ord(value[i]) if type(value[i]) == str else int(value[i]) for i in range(0, len(value))] + + +def to_short_list(value): + return [ord(value[i]) if type(value[i]) == str else int(value[i]) for i in range(0, len(value))] + + +def to_string_list(value, encoding): + return [to_string(value[i], encoding) for i in range(0, len(value))] + + +def eval_value(value, schema=None): + if schema == sch.CHAR: + return bytes(value, 'utf-8')[0], schema + if schema == sch.BYTE or schema == sch.SHORT or schema == sch.INT or schema == sch.LONG: + return int(value, 0), schema + if schema == sch.ADDRESS: + nproc = util.selected_process() + base, addr = STATE.trace.memory_mapper.map(nproc, value) + return (base, addr), sch.ADDRESS + if type(value) != str: + value = eval("{}".format(value)) + if schema == sch.BOOL_ARR: + return to_bool_list(value), schema + if schema == sch.BYTE_ARR: + return to_bytes(value), schema + if schema == sch.SHORT_ARR: + return to_short_list(value), schema + if schema == sch.INT_ARR: + return to_int_list(value), schema + if schema == sch.LONG_ARR: + return to_int_list(value), schema + if schema == sch.STRING_ARR: + return to_string_list(value, 'utf-8'), schema + if schema == sch.CHAR_ARR: + return to_string(value, 'utf-8'), sch.CHAR_ARR + if schema == sch.STRING: + return to_string(value, 'utf-8'), sch.STRING + + return value, schema + + +def ghidra_trace_set_value(path: str, key: str, value, schema=None): + """ + Set a value (attribute or element) in the Ghidra trace's object tree. + + A void value implies removal. + NOTE: The type of an expression may be subject to the dbgeng's current + language. which current defaults to DEBUG_EXPR_CPLUSPLUS (vs DEBUG_EXPR_MASM). + For most non-primitive cases, we are punting to the Python API. + """ + schema = None if schema is None else sch.Schema(schema) + STATE.require_tx() + if schema == sch.OBJECT: + val = STATE.trace.proxy_object_path(value) + else: + val, schema = eval_value(value, schema) + if schema == sch.ADDRESS: + base, addr = val + val = addr + if base != addr.space: + trace.create_overlay_space(base, addr.space) + STATE.trace.proxy_object_path(path).set_value(key, val, schema) + + +def ghidra_trace_retain_values(path: str, keys: str): + """ + Retain only those keys listed, settings all others to null. + + Takes a list of keys to retain. The first argument may optionally be one of + the following: + + --elements To set all other elements to null (default) + --attributes To set all other attributes to null + --both To set all other values (elements and attributes) to null + + If, for some reason, one of the keys to retain would be mistaken for this + switch, then the switch is required. Only the first argument is taken as the + switch. All others are taken as keys. + """ + + keys = keys.split(" ") + + STATE.require_tx() + kinds = 'elements' + if keys[0] == '--elements': + kinds = 'elements' + keys = keys[1:] + elif keys[0] == '--attributes': + kinds = 'attributes' + keys = keys[1:] + elif keys[0] == '--both': + kinds = 'both' + keys = keys[1:] + elif keys[0].startswith('--'): + raise RuntimeError("Invalid argument: " + keys[0]) + STATE.trace.proxy_object_path(path).retain_values(keys, kinds=kinds) + + +def ghidra_trace_get_obj(path): + """ + Get an object descriptor by its canonical path. + + This isn't the most informative, but it will at least confirm whether an + object exists and provide its id. + """ + + trace = STATE.require_trace() + object = trace.get_object(path) + print("{}\t{}".format(object.id, object.path)) + + +class TableColumn(object): + + def __init__(self, head): + self.head = head + self.contents = [head] + self.is_last = False + + def add_data(self, data): + self.contents.append(str(data)) + + def finish(self): + self.width = max(len(d) for d in self.contents) + 1 + + def print_cell(self, i): + print( + self.contents[i] if self.is_last else self.contents[i].ljust(self.width), end='') + + +class Tabular(object): + + def __init__(self, heads): + self.columns = [TableColumn(h) for h in heads] + self.columns[-1].is_last = True + self.num_rows = 1 + + def add_row(self, datas): + for c, d in zip(self.columns, datas): + c.add_data(d) + self.num_rows += 1 + + def print_table(self): + for c in self.columns: + c.finish() + for rn in range(self.num_rows): + for c in self.columns: + c.print_cell(rn) + print('') + + +def val_repr(value): + if isinstance(value, TraceObject): + return value.path + elif isinstance(value, Address): + return '{}:{:08x}'.format(value.space, value.offset) + return repr(value) + + +def print_values(values): + table = Tabular(['Parent', 'Key', 'Span', 'Value', 'Type']) + for v in values: + table.add_row( + [v.parent.path, v.key, v.span, val_repr(v.value), v.schema]) + table.print_table() + + +def ghidra_trace_get_values(pattern): + """ + List all values matching a given path pattern. + """ + + trace = STATE.require_trace() + values = trace.get_values(pattern) + print_values(values) + + +def ghidra_trace_get_values_rng(address, length): + """ + List all values intersecting a given address range. + """ + + trace = STATE.require_trace() + start, end = eval_range(address, length) + nproc = util.selected_process() + base, addr = trace.memory_mapper.map(nproc, start) + # Do not create the space. We're querying. No tx. + values = trace.get_values_intersecting(addr.extend(end - start)) + print_values(values) + + +def activate(path=None): + trace = STATE.require_trace() + if path is None: + nproc = util.selected_process() + if nproc is None: + path = PROCESSES_PATH + else: + nthrd = util.selected_thread() + if nthrd is None: + path = PROCESS_PATTERN.format(procnum=nproc) + else: + frame = util.selected_frame() + if frame is None: + path = THREAD_PATTERN.format(procnum=nproc, tnum=nthrd) + else: + path = FRAME_PATTERN.format(procnum=nproc, tnum=nthrd, level=frame) + trace.proxy_object_path(path).activate() + + +def ghidra_trace_activate(path=None): + """ + Activate an object in Ghidra's GUI. + + This has no effect if the current trace is not current in Ghidra. If path is + omitted, this will activate the current frame. + """ + + activate(path) + + +def ghidra_trace_disassemble(address): + """ + Disassemble starting at the given seed. + + Disassembly proceeds linearly and terminates at the first branch or unknown + memory encountered. + """ + + STATE.require_tx() + nproc = util.selected_process() + base, addr = STATE.trace.memory_mapper.map(nproc, address) + if base != addr.space: + trace.create_overlay_space(base, addr.space) + + length = STATE.trace.disassemble(addr) + print("Disassembled {} bytes".format(length)) + + +def put_processes(): + keys = [] + # Set running=True to avoid process changes, even while stopped + for key in PROGRAMS.keys(): + ppath = PROCESS_PATTERN.format(procnum=key) + keys.append(PROCESS_KEY_PATTERN.format(procnum=key)) + procobj = STATE.trace.create_object(ppath) + + p = PROGRAMS[key] + procobj.set_value('State', str(p.flags)) + procobj.set_value('PID', key) + procobj.set_value('_display', '[{:x}]'.format(key)) + procobj.insert() + STATE.trace.proxy_object_path(PROCESSES_PATH).retain_values(keys) + + +def ghidra_trace_put_processes(): + """ + Put the list of processes into the trace's Processes list. + """ + + STATE.require_tx() + with STATE.client.batch() as b: + put_processes() + + + +def put_environment(): + nproc = util.selected_process() + epath = ENV_PATTERN.format(procnum=nproc) + envobj = STATE.trace.create_object(epath) + envobj.set_value('Debugger', 'drgn') + envobj.set_value('Arch', arch.get_arch()) + envobj.set_value('OS', arch.get_osabi()) + envobj.set_value('Endian', arch.get_endian()) + envobj.insert() + + +def ghidra_trace_put_environment(): + """ + Put some environment indicators into the Ghidra trace + """ + + STATE.require_tx() + with STATE.client.batch() as b: + put_environment() + + +def put_regions(): + nproc = util.selected_process() + if nproc is None: + return + + try: + regions = prog.loaded_modules() + except Exception as e: + regions = [] + #if len(regions) == 0: + # regions = util.full_mem() + + mapper = STATE.trace.memory_mapper + keys = [] + # r : MEMORY_BASIC_INFORMATION64 + for r in regions: + start = r[0].address_range[0] + end = r[0].address_range[1] + size = end - start + 1 + rpath = REGION_PATTERN.format(procnum=nproc, start=start) + keys.append(REGION_KEY_PATTERN.format(start=start)) + regobj = STATE.trace.create_object(rpath) + (start_base, start_addr) = map_address(start) + regobj.set_value('Range', start_addr.extend(size)) + regobj.set_value('Name', r[0].name) + regobj.set_value('Object File', r[0].loaded_file_path) + regobj.set_value('_readable', True) + regobj.set_value('_writable', True) + regobj.set_value('_executable', True) + regobj.set_value('_display', '{:x} {}'.format(start, r[0].name)) + regobj.insert() + STATE.trace.proxy_object_path( + MEMORY_PATTERN.format(procnum=nproc)).retain_values(keys) + + +def ghidra_trace_put_regions(): + """ + Read the memory map, if applicable, and write to the trace's Regions + """ + + STATE.require_tx() + with STATE.client.batch() as b: + put_regions() + + + +def put_modules(): + nproc = util.selected_process() + if nproc is None: + return + + try: + modules = prog.modules() + except Exception as e: + return + + mapper = STATE.trace.memory_mapper + mod_keys = [] + for m in modules: + name = m.name + # m[1] : _DEBUG_MODULE_PARAMETERS + base = m.address_range[0] + hbase = hex(base) + size = m.address_range[1] - base + mpath = MODULE_PATTERN.format(procnum=nproc, modpath=hbase) + modobj = STATE.trace.create_object(mpath) + mod_keys.append(MODULE_KEY_PATTERN.format(modpath=hbase)) + base_base, base_addr = mapper.map(nproc, base) + if base_base != base_addr.space: + STATE.trace.create_overlay_space(base_base, base_addr.space) + modobj.set_value('Range', base_addr.extend(size)) + modobj.set_value('Name', name) + modobj.set_value('_display', '{:x} {}'.format(base, name)) + modobj.insert() + attrobj = STATE.trace.create_object(mpath+".Attributes") + attrobj.set_value('BuildId', m.build_id) + attrobj.set_value('DebugBias', m.debug_file_bias) + attrobj.set_value('DebugPath', m.debug_file_path) + attrobj.set_value('DebugStatus', str(m.debug_file_status)) + attrobj.set_value('LoadBias', m.loaded_file_bias) + attrobj.set_value('LoadPath', m.loaded_file_path) + attrobj.set_value('LoadStatus', str(m.loaded_file_status)) + attrobj.insert() + if type(m) == drgn.RelocatableModule: + secobj = STATE.trace.create_object(mpath+".Sections") + secobj.insert() + STATE.trace.proxy_object_path(MODULES_PATTERN.format( + procnum=nproc)).retain_values(mod_keys) + + +def ghidra_trace_put_modules(): + """ + Gather object files, if applicable, and write to the trace's Modules + """ + + STATE.require_tx() + with STATE.client.batch() as b: + put_modules() + + + +def put_sections(m : drgn.RelocatableModule): + nproc = util.selected_process() + if nproc is None: + return + + mapper = STATE.trace.memory_mapper + section_keys = [] + sections = m.section_addresses + maddr = hex(m.address_range[0]) + for key in sections.keys(): + value = sections[key] + spath = SECTION_PATTERN.format(procnum=nproc, modpath=maddr, secname=key) + sobj = STATE.trace.create_object(spath) + section_keys.append(SECTION_KEY_PATTERN.format(modpath=maddr, secname=key)) + base_base, base_addr = mapper.map(nproc, value) + if base_base != base_addr.space: + STATE.trace.create_overlay_space(base_base, base_addr.space) + sobj.set_value('Address', base_addr) + sobj.set_value('Range', base_addr.extend(1)) + sobj.set_value('Name', key) + sobj.insert() + STATE.trace.proxy_object_path(SECTIONS_PATTERN.format( + procnum=nproc, modpath=maddr)).retain_values(section_keys) + + + +def convert_state(t): + if t.IsSuspended(): + return 'SUSPENDED' + if t.IsStopped(): + return 'STOPPED' + return 'RUNNING' + + +def put_threads(running=False): + nproc = util.selected_process() + if nproc is None: + return + + keys = [] + # Set running=True to avoid thread changes, even while stopped + threads = prog.threads() + for i, t in enumerate(threads): + nthrd = t.tid + tpath = THREAD_PATTERN.format(procnum=nproc, tnum=nthrd) + tobj = STATE.trace.create_object(tpath) + keys.append(THREAD_KEY_PATTERN.format(tnum=nthrd)) + + tobj.set_value('TID', nthrd) + short = '{:d} {:x}:{:x}'.format(i, nproc, nthrd) + tobj.set_value('_short_display', short) + if hasattr(t, 'name'): + tobj.set_value('_display', '{:x} {:x}:{:x} {}'.format(i, nproc, nthrd, t.name)) + tobj.set_value('Name', t.name) + else: + tobj.set_value('_display', short) + #tobj.set_value('Object', t.object) + tobj.insert() + stackobj = STATE.trace.create_object(tpath+".Stack") + stackobj.insert() + STATE.trace.proxy_object_path( + THREADS_PATTERN.format(procnum=nproc)).retain_values(keys) + + +def ghidra_trace_put_threads(): + """ + Put the current process's threads into the Ghidra trace + """ + + STATE.require_tx() + with STATE.client.batch() as b: + put_threads() + + + +def put_frames(): + nproc = util.selected_process() + if nproc < 0: + return + nthrd = util.selected_thread() + if nthrd is None: + return + thread = prog.thread(nthrd) + if thread is None: + return + + try: + stack = thread.stack_trace() + except Exception as e: + return + + mapper = STATE.trace.memory_mapper + keys = [] + for i,f in enumerate(stack): + fpath = FRAME_PATTERN.format( + procnum=nproc, tnum=nthrd, level=i) + fobj = STATE.trace.create_object(fpath) + keys.append(FRAME_KEY_PATTERN.format(level=i)) + base, offset_inst = mapper.map(nproc, f.pc) + if base != offset_inst.space: + STATE.trace.create_overlay_space(base, offset_inst.space) + base, offset_stack = mapper.map(nproc, f.sp) + if base != offset_stack.space: + STATE.trace.create_overlay_space(base, offset_stack.space) + fobj.set_value('PC', offset_inst) + fobj.set_value('SP', offset_stack) + fobj.set_value('Name', f.name) + fobj.set_value('_display', "#{} {} {}".format(i, hex(offset_inst.offset), f.name)) + fobj.insert() + aobj = STATE.trace.create_object(fpath+".Attributes") + aobj.insert() + aobj.set_value('Inline', f.is_inline) + aobj.set_value('Interrupted', f.interrupted) + aobj.insert() + lobj = STATE.trace.create_object(fpath+".Locals") + lobj.insert() + robj = STATE.trace.create_object(fpath+".Registers") + robj.insert() + try: + src = f.source() + srcobj = STATE.trace.create_object(fpath+".Source") + srcobj.set_value('Filename', src[0]) + srcobj.set_value('Line', src[1]) + srcobj.set_value('Column', src[2]) + srcobj.insert() + except Exception as e: + pass + STATE.trace.proxy_object_path(STACK_PATTERN.format( + procnum=nproc, tnum=nthrd)).retain_values(keys) + + +def ghidra_trace_put_frames(): + """ + Put the current thread's frames into the Ghidra trace + """ + + STATE.require_tx() + with STATE.client.batch() as b: + put_frames() + + + +def put_symbols(pattern=None): + nproc = util.selected_process() + if nproc is None: + return + + #keys = [] + symbols = prog.symbols(pattern) + for s in symbols: + spath = SYMBOL_PATTERN.format(procnum=nproc, sid=hash(str(s))) + sobj = STATE.trace.create_object(spath) + #keys.append(SYMBOL_KEY_PATTERN.format(sid=i)) + + short = '{:x}'.format(s.address) + sobj.set_value('_short_display', short) + if hasattr(s, 'name'): + long = '{:x} {}'.format(s.address, s.name) + sobj.set_value('_display', long) + sobj.set_value('Name', s.name) + else: + sobj.set_value('_display', short) + mapper = STATE.trace.memory_mapper + base, offset = mapper.map(nproc, s.address) + if base != offset.space: + STATE.trace.create_overlay_space(base, offset.space) + sobj.set_value('Address', offset) + sobj.set_value('Size', s.size) + sobj.set_value('Binding', str(s.binding)) + sobj.set_value('Kind', str(s.kind)) + sobj.insert() + #STATE.trace.proxy_object_path( + # SYMBOLS_PATTERN.format(procnum=nproc)).retain_values(keys) + + +def ghidra_trace_put_symbols(): + """ + Put the current process's threads into the Ghidra trace + """ + + STATE.require_tx() + with STATE.client.batch() as b: + put_symbols() + + + +def set_display(key, value, obj): + kind = util.get_kind(value) + vstr = util.get_value(value) + # istr = util.get_intrinsic_value(value) + if kind == ModelObjectKind.TARGET_OBJECT.value: + hloc = util.get_location(value) + ti = util.get_type_info(value) + if ti is not None: + name = util.get_name(ti) + if name is not None: + key += " : " + name + obj.set_value('_display', key) + if hloc is not None: + key += " @ " + str(hloc) + obj.set_value('_display', key) + (hloc_base, hloc_addr) = map_address(int(hloc,0)) + obj.set_value('_address', hloc_addr, schema=Address) + if vstr is not None: + key += " : " + str(vstr) + obj.set_value('_display', key) + + +def map_address(address): + nproc = util.selected_process() + mapper = STATE.trace.memory_mapper + base, addr = mapper.map(nproc, address) + if base != addr.space: + STATE.trace.create_overlay_space(base, addr.space) + return (base, addr) + + +# def ghidra_trace_put_generic(node): +# """ +# Put the current thread's frames into the Ghidra trace +# """ +# +# STATE.require_tx() +# with STATE.client.batch() as b: +# put_generic(node) + + +def ghidra_trace_put_all(): + """ + Put everything currently selected into the Ghidra trace + """ + + STATE.require_tx() + with STATE.client.batch() as b: + put_environment() + put_regions() + put_modules() + syms = SYMBOLS_PATTERN.format(procnum=util.selected_process()) + sobj = STATE.trace.create_object(syms) + sobj.insert() + #put_symbols() + put_threads() + put_frames() + ghidra_trace_putreg() + ghidra_trace_putmem(get_pc(), 1) + ghidra_trace_putmem(get_sp(), 1) + + +def ghidra_trace_install_hooks(): + """ + Install hooks to trace in Ghidra + """ + + hooks.install_hooks() + + +def ghidra_trace_remove_hooks(): + """ + Remove hooks to trace in Ghidra + + Using this directly is not recommended, unless it seems the hooks are + preventing pydbg or other extensions from operating. Removing hooks will break + trace synchronization until they are replaced. + """ + + hooks.remove_hooks() + + +def ghidra_trace_sync_enable(): + """ + Synchronize the current process with the Ghidra trace + + This will automatically install hooks if necessary. The goal is to record + the current frame, thread, and process into the trace immediately, and then + to append the trace upon stopping and/or selecting new frames. This action + is effective only for the current process. This command must be executed + for each individual process you'd like to synchronize. In older versions of + pydbg, certain events cannot be hooked. In that case, you may need to execute + certain "trace put" commands manually, or go without. + + This will have no effect unless or until you start a trace. + """ + + hooks.install_hooks() + hooks.enable_current_process() + + +def ghidra_trace_sync_disable(): + """ + Cease synchronizing the current process with the Ghidra trace + + This is the opposite of 'ghidra_trace_sync-disable', except it will not + automatically remove hooks. + """ + + hooks.disable_current_process() + + +def ghidra_util_wait_stopped(timeout=1): + """ + Spin wait until the selected thread is stopped. + """ + + start = time.time() + t = util.selected_thread() + if t is None: + return + while not t.IsStopped() and not t.IsSuspended(): + t = util.selected_thread() # I suppose it could change + time.sleep(0.1) + if time.time() - start > timeout: + raise RuntimeError('Timed out waiting for thread to stop') + + +def get_pc(): + try: + thread = prog.thread(util.selected_thread()) + stack = thread.stack_trace() + except Exception as e: + return 0 + + frame = stack[util.selected_frame()] + return frame.pc + + +def get_sp(): + try: + thread = prog.thread(util.selected_thread()) + stack = thread.stack_trace() + except Exception as e: + return 0 + + frame = stack[util.selected_frame()] + return frame.sp + diff --git a/Ghidra/Debug/Debugger-agent-drgn/src/main/py/src/ghidradrgn/hooks.py b/Ghidra/Debug/Debugger-agent-drgn/src/main/py/src/ghidradrgn/hooks.py new file mode 100644 index 00000000000..01bf9ffc67d --- /dev/null +++ b/Ghidra/Debug/Debugger-agent-drgn/src/main/py/src/ghidradrgn/hooks.py @@ -0,0 +1,249 @@ +## ### +# IP: GHIDRA +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +## +import threading +import time + +import drgn + +from . import commands, util + + +ALL_EVENTS = 0xFFFF + + +class HookState(object): + __slots__ = ('installed', 'mem_catchpoint') + + def __init__(self): + self.installed = False + self.mem_catchpoint = None + + +class ProcessState(object): + __slots__ = ('first', 'regions', 'modules', 'threads', + 'breaks', 'watches', 'visited') + + def __init__(self): + self.first = True + # For things we can detect changes to between stops + self.regions = False + self.modules = False + self.threads = False + self.breaks = False + self.watches = False + # For frames and threads that have already been synced since last stop + self.visited = set() + + def record(self, description=None): + first = self.first + self.first = False + if description is not None: + commands.STATE.trace.snapshot(description) + if first: + commands.put_processes() + commands.put_environment() + if self.threads: + commands.put_threads() + self.threads = False + nthrd = util.selected_thread() + if nthrd is not None: + if first or nthrd not in self.visited: + commands.put_frames() + self.visited.add(nthrd) + level = util.selected_frame() + hashable_frame = (nthrd, level) + if first or hashable_frame not in self.visited: + commands.putreg() + try: + commands.putmem(commands.get_pc(), 1, True, True) + except BaseException as e: + print(f"Couldn't record page with PC: {e}") + try: + commands.putmem(commands.get_sp(), 1, True, True) + except BaseException as e: + print(f"Couldn't record page with SP: {e}") + self.visited.add(hashable_frame) + if first or self.regions or self.modules: + # Sections, memory syscalls, or stack allocations + commands.put_regions() + self.regions = False + if first or self.modules: + commands.put_modules() + self.modules = False + + def record_continued(self): + commands.put_processes() + commands.put_threads() + + def record_exited(self, exit_code): + nproc = util.selected_process() + ipath = commands.PROCESS_PATTERN.format(procnum=nproc) + procobj = commands.STATE.trace.proxy_object_path(ipath) + procobj.set_value('Exit Code', exit_code) + procobj.set_value('State', 'TERMINATED') + + +HOOK_STATE = HookState() +PROC_STATE = {} + +def on_new_process(event): + trace = commands.STATE.trace + if trace is None: + return + with commands.STATE.client.batch(): + with trace.open_tx("New Process {}".format(event.process.num)): + commands.put_processes() # TODO: Could put just the one.... + + +def on_process_selected(): + nproc = util.selected_process() + if nproc not in PROC_STATE: + return + trace = commands.STATE.trace + if trace is None: + return + with commands.STATE.client.batch(): + with trace.open_tx("Process {} selected".format(nproc)): + PROC_STATE[nproc].record() + commands.activate() + + +def on_new_thread(event): + nproc = util.selected_process() + if nproc not in PROC_STATE: + return + PROC_STATE[nproc].threads = True + + +def on_thread_selected(): + nproc = util.selected_process() + if nproc not in PROC_STATE: + return + trace = commands.STATE.trace + if trace is None: + return + nthrd = util.selected_thread() + with commands.STATE.client.batch(): + with trace.open_tx("Thread {}.{} selected".format(nproc, nthrd)): + PROC_STATE[nproc].record() + commands.put_threads() + commands.activate() + + +def on_frame_selected(): + nproc = util.selected_process() + if nproc not in PROC_STATE: + return + trace = commands.STATE.trace + if trace is None: + return + nthrd = util.selected_thread() + level = util.selected_frame() + with commands.STATE.client.batch(): + with trace.open_tx("Frame {}.{}.{} selected".format(nproc, nthrd, level)): + PROC_STATE[nproc].record() + commands.put_threads() + commands.put_frames() + commands.activate() + + +def on_memory_changed(event): + nproc = util.get_process() + if nproc not in PROC_STATE: + return + trace = commands.STATE.trace + if trace is None: + return + with commands.STATE.client.batch(): + with trace.open_tx("Memory *0x{:08x} changed".format(event.address)): + commands.put_bytes(event.address, event.address + event.length, + pages=False, is_mi=False, result=None) + + +def on_register_changed(event): + nproc = util.get_process() + if nproc not in PROC_STATE: + return + trace = commands.STATE.trace + if trace is None: + return + with commands.STATE.client.batch(): + with trace.open_tx("Register {} changed".format(event.regnum)): + commands.putreg() + + +def on_cont(event): + nproc = util.selected_process() + if nproc not in PROC_STATE: + return + trace = commands.STATE.trace + if trace is None: + return + state = PROC_STATE[nproc] + with commands.STATE.client.batch(): + with trace.open_tx("Continued"): + state.record_continued() + + +def on_stop(event): + nproc = util.selected_process() + if nproc not in PROC_STATE: + PROC_STATE[nproc] = ProcessState() + trace = commands.STATE.trace + if trace is None: + print("no trace") + return + state = PROC_STATE[nproc] + state.visited.clear() + with commands.STATE.client.batch(): + with trace.open_tx("Stopped"): + state.record("Stopped") + commands.put_threads() + commands.put_frames() + commands.activate() + + +def modules_changed(): + nproc = util.selected_process() + if nproc not in PROC_STATE: + return + PROC_STATE[nproc].modules = True + + +def install_hooks(): + if HOOK_STATE.installed: + return + HOOK_STATE.installed = True + + event_thread = EventThread() + event_thread.start() + + +def remove_hooks(): + if not HOOK_STATE.installed: + return + HOOK_STATE.installed = False + + +def enable_current_process(): + nproc = util.selected_process() + PROC_STATE[nproc] = ProcessState() + + +def disable_current_process(): + nproc = util.selected_process() + if nproc in PROC_STATE: + del PROC_STATE[nproc] diff --git a/Ghidra/Debug/Debugger-agent-drgn/src/main/py/src/ghidradrgn/methods.py b/Ghidra/Debug/Debugger-agent-drgn/src/main/py/src/ghidradrgn/methods.py new file mode 100644 index 00000000000..f6fce47484e --- /dev/null +++ b/Ghidra/Debug/Debugger-agent-drgn/src/main/py/src/ghidradrgn/methods.py @@ -0,0 +1,388 @@ +## ### +# IP: GHIDRA +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +## +from concurrent.futures import Future, ThreadPoolExecutor +from contextlib import redirect_stdout +from io import StringIO +import re +import sys +import time + +import drgn +import drgn.cli + +from ghidratrace import sch +from ghidratrace.client import MethodRegistry, ParamDesc, Address, AddressRange + +from . import util, commands, hooks + + +REGISTRY = MethodRegistry(ThreadPoolExecutor( + max_workers=1, thread_name_prefix='MethodRegistry')) + + +def extre(base, ext): + return re.compile(base.pattern + ext) + + +PROCESSES_PATTERN = re.compile('Processes') +PROCESS_PATTERN = extre(PROCESSES_PATTERN, '\[(?P\\d*)\]') +ENV_PATTERN = extre(PROCESS_PATTERN, '\.Environment') +THREADS_PATTERN = extre(PROCESS_PATTERN, '\.Threads') +THREAD_PATTERN = extre(THREADS_PATTERN, '\[(?P\\d*)\]') +STACK_PATTERN = extre(THREAD_PATTERN, '\.Stack') +FRAME_PATTERN = extre(STACK_PATTERN, '\[(?P\\d*)\]') +REGS_PATTERN = extre(FRAME_PATTERN, '.Registers') +LOCALS_PATTERN = extre(FRAME_PATTERN, '.Locals') +MEMORY_PATTERN = extre(PROCESS_PATTERN, '\.Memory') +MODULES_PATTERN = extre(PROCESS_PATTERN, '\.Modules') +MODULE_PATTERN = extre(MODULES_PATTERN, '\[(?P.*)\]') + + +def find_availpid_by_pattern(pattern, object, err_msg): + mat = pattern.fullmatch(object.path) + if mat is None: + raise TypeError(f"{object} is not {err_msg}") + pid = int(mat['pid']) + return pid + + +def find_availpid_by_obj(object): + return find_availpid_by_pattern(AVAILABLE_PATTERN, object, "an Available") + + +def find_proc_by_num(id): + if id != util.selected_process(): + util.select_process(id) + return util.selected_process() + + +def find_proc_by_pattern(object, pattern, err_msg): + mat = pattern.fullmatch(object.path) + if mat is None: + raise TypeError(f"{object} is not {err_msg}") + procnum = int(mat['procnum']) + return find_proc_by_num(procnum) + + +def find_proc_by_obj(object): + return find_proc_by_pattern(object, PROCESS_PATTERN, "an Process") + + +def find_proc_by_env_obj(object): + return find_proc_by_pattern(object, ENV_PATTERN, "an Environment") + + +def find_proc_by_threads_obj(object): + return find_proc_by_pattern(object, THREADS_PATTERN, "a ThreadContainer") + + +def find_proc_by_mem_obj(object): + return find_proc_by_pattern(object, MEMORY_PATTERN, "a Memory") + + +def find_proc_by_modules_obj(object): + return find_proc_by_pattern(object, MODULES_PATTERN, "a ModuleContainer") + + +def find_thread_by_num(id): + if id != util.selected_thread(): + util.select_thread(id) + return util.selected_thread() + + +def find_thread_by_pattern(pattern, object, err_msg): + mat = pattern.fullmatch(object.path) + if mat is None: + raise TypeError(f"{object} is not {err_msg}") + pnum = int(mat['procnum']) + tnum = int(mat['tnum']) + find_proc_by_num(pnum) + return find_thread_by_num(tnum) + + +def find_thread_by_obj(object): + return find_thread_by_pattern(THREAD_PATTERN, object, "a Thread") + + +def find_thread_by_stack_obj(object): + return find_thread_by_pattern(STACK_PATTERN, object, "a Stack") + + +def find_thread_by_regs_obj(object): + return find_thread_by_pattern(REGS_PATTERN, object, "a RegisterValueContainer") + + +def find_frame_by_level(level): + tnum = util.selected_thread() + thread = commands.prog.thread(tnum) + try: + frames = thread.stack_trace() + except Exception as e: + print(e) + return + + for i,f in enumerate(frames): + if i == level: + if i != util.selected_frame(): + util.select_frame(i) + return i,f + + +def find_frame_by_pattern(pattern, object, err_msg): + mat = pattern.fullmatch(object.path) + if mat is None: + raise TypeError(f"{object} is not {err_msg}") + pnum = int(mat['procnum']) + tnum = int(mat['tnum']) + level = int(mat['level']) + find_proc_by_num(pnum) + find_thread_by_num(tnum) + return find_frame_by_level(level) + + +def find_frame_by_obj(object): + return find_frame_by_pattern(FRAME_PATTERN, object, "a StackFrame") + + +def find_frame_by_regs_obj(object): + return find_frame_by_pattern(REGS_PATTERN, object, "a RegisterValueContainer") + + +def find_frame_by_locals_obj(object): + return find_frame_by_pattern(LOCALS_PATTERN, object, "a LocalsContainer") + + +def find_module_by_base(modbase): + for m in commands.prog.modules(): + if modbase == str(hex(m.address_range[0])): + return m + + +def find_module_by_pattern(pattern, object, err_msg): + mat = pattern.fullmatch(object.path) + if mat is None: + raise TypeError(f"{object} is not {err_msg}") + pnum = int(mat['procnum']) + modbase = mat['modbase'] + find_proc_by_num(pnum) + return find_module_by_base(modbase) + + +def find_module_by_obj(object): + return find_module_by_pattern(MODULE_PATTERN, object, "a Module") + + +shared_globals = dict() + + +@REGISTRY.method +def execute(cmd: str, to_string: bool=False): + """Execute a Python3 command or script.""" + if to_string: + data = StringIO() + with redirect_stdout(data): + exec(cmd, shared_globals) + return data.getvalue() + else: + exec(cmd, shared_globals) + + +@REGISTRY.method(action='refresh', display='Refresh Processes') +def refresh_processes(node: sch.Schema('ProcessContainer')): + """Refresh the list of processes.""" + with commands.open_tracked_tx('Refresh Processes'): + commands.ghidra_trace_put_processes() + + +@REGISTRY.method(action='refresh', display='Refresh Environment') +def refresh_environment(node: sch.Schema('Environment')): + """Refresh the environment descriptors (arch, os, endian).""" + with commands.open_tracked_tx('Refresh Environment'): + commands.ghidra_trace_put_environment() + + +@REGISTRY.method(action='refresh', display='Refresh Threads') +def refresh_threads(node: sch.Schema('ThreadContainer')): + """Refresh the list of threads in the process.""" + with commands.open_tracked_tx('Refresh Threads'): + commands.ghidra_trace_put_threads() + + +# @REGISTRY.method(action='refresh', display='Refresh Symbols') +# def refresh_symbols(node: sch.Schema('SymbolContainer')): +# """Refresh the list of symbols in the process.""" +# with commands.open_tracked_tx('Refresh Symbols'): +# commands.ghidra_trace_put_symbols() + + +@REGISTRY.method(action='show_symbol', display='Retrieve Symbols') +def retrieve_symbols( + session: sch.Schema('SymbolContainer'), + pattern: ParamDesc(str, display='Pattern')): + """ + Load the symbol set matching the pattern. + """ + with commands.open_tracked_tx('Retrieve Symbols'): + commands.put_symbols(pattern) + + +@REGISTRY.method(action='refresh', display='Refresh Stack') +def refresh_stack(node: sch.Schema('Stack')): + """Refresh the backtrace for the thread.""" + tnum = find_thread_by_stack_obj(node) + with commands.open_tracked_tx('Refresh Stack'): + commands.ghidra_trace_put_frames() + + +@REGISTRY.method(action='refresh', display='Refresh Registers') +def refresh_registers(node: sch.Schema('RegisterValueContainer')): + """Refresh the register values for the selected frame""" + level = find_frame_by_regs_obj(node) + with commands.open_tracked_tx('Refresh Registers'): + commands.ghidra_trace_putreg() + + +@REGISTRY.method(action='refresh', display='Refresh Locals') +def refresh_locals(node: sch.Schema('LocalsContainer')): + """Refresh the local values for the selected frame""" + level = find_frame_by_locals_obj(node) + with commands.open_tracked_tx('Refresh Registers'): + commands.ghidra_trace_put_locals() + + +@REGISTRY.method(action='refresh', display='Refresh Memory') +def refresh_mappings(node: sch.Schema('Memory')): + """Refresh the list of memory regions for the process.""" + with commands.open_tracked_tx('Refresh Memory Regions'): + commands.ghidra_trace_put_regions() + + +@REGISTRY.method(action='refresh', display='Refresh Modules') +def refresh_modules(node: sch.Schema('ModuleContainer')): + """ + Refresh the modules list for the process. + """ + with commands.open_tracked_tx('Refresh Modules'): + commands.ghidra_trace_put_modules() + + +@REGISTRY.method(action='activate') +def activate_process(process: sch.Schema('Process')): + """Switch to the process.""" + find_proc_by_obj(process) + + +@REGISTRY.method(action='activate') +def activate_thread(thread: sch.Schema('Thread')): + """Switch to the thread.""" + find_thread_by_obj(thread) + + +@REGISTRY.method(action='activate') +def activate_frame(frame: sch.Schema('StackFrame')): + """Select the frame.""" + i,f = find_frame_by_obj(frame) + util.select_frame(i) + with commands.open_tracked_tx('Refresh Stack'): + commands.ghidra_trace_put_frames() + with commands.open_tracked_tx('Refresh Registers'): + commands.ghidra_trace_putreg() + + +@REGISTRY.method +def read_mem(process: sch.Schema('Process'), range: AddressRange): + """Read memory.""" + # print("READ_MEM: process={}, range={}".format(process, range)) + nproc = find_proc_by_obj(process) + offset_start = process.trace.memory_mapper.map_back( + nproc, Address(range.space, range.min)) + with commands.open_tracked_tx('Read Memory'): + result = commands.put_bytes( + offset_start, offset_start + range.length() - 1, pages=True, display_result=False) + if result['count'] == 0: + commands.putmem_state( + offset_start, offset_start+range.length() - 1, 'error') + + +@REGISTRY.method(action='attach', display='Attach by pid') +def attach_pid( + processes: sch.Schema('ProcessContainer'), + pid: ParamDesc(str, display='PID')): + """Attach the process to the given target.""" + prog = drgn.Program() + prog.set_pid(int(pid)) + util.selected_pid = int(pid) + util.selected_tid = prog.main_thread().tid + default_symbols = {"default": True, "main": True} + try: + prog.load_debug_info(None, **default_symbols) + except drgn.MissingDebugInfoError as e: + print(e) + #commands.ghidra_trace_start(pid) + commands.PROGRAMS[pid] = prog + commands.prog = prog + with commands.open_tracked_tx('Refresh Processes'): + commands.ghidra_trace_put_processes() + + +@REGISTRY.method(action='attach', display='Attach core dump') +def attach_core( + processes: sch.Schema('ProcessContainer'), + core: ParamDesc(str, display='Core dump')): + """Attach the process to the given target.""" + prog = drgn.Program() + prog.set_core_dump(core) + default_symbols = {"default": True, "main": True} + try: + prog.load_debug_info(None, **default_symbols) + except drgn.MissingDebugInfoError as e: + print(e) + + util.selected_pid += 1 + commands.PROGRAMS[util.selected_pid] = prog + commands.prog = prog + with commands.open_tracked_tx('Refresh Processes'): + commands.ghidra_trace_put_processes() + + +@REGISTRY.method(action='step_into') +def step_into(thread: sch.Schema('Thread'), n: ParamDesc(int, display='N')=1): + """Step one instruction exactly.""" + find_thread_by_obj(thread) + time.sleep(1) + hooks.on_stop(None) + + +# @REGISTRY.method +# def kill(process: sch.Schema('Process')): +# """Kill execution of the process.""" +# commands.ghidra_trace_kill() + + +# @REGISTRY.method(action='resume') +# def go(process: sch.Schema('Process')): +# """Continue execution of the process.""" +# util.dbg.run_async(lambda: dbg().go()) + + +# @REGISTRY.method +# def interrupt(process: sch.Schema('Process')): +# """Interrupt the execution of the debugged program.""" +# # SetInterrupt is reentrant, so bypass the thread checks +# util.dbg._protected_base._control.SetInterrupt( +# DbgEng.DEBUG_INTERRUPT_ACTIVE) + + diff --git a/Ghidra/Debug/Debugger-agent-drgn/src/main/py/src/ghidradrgn/schema.xml b/Ghidra/Debug/Debugger-agent-drgn/src/main/py/src/ghidradrgn/schema.xml new file mode 100644 index 00000000000..28c5bd61912 --- /dev/null +++ b/Ghidra/Debug/Debugger-agent-drgn/src/main/py/src/ghidradrgn/schema.xml @@ -0,0 +1,183 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/Ghidra/Debug/Debugger-agent-drgn/src/main/py/src/ghidradrgn/util.py b/Ghidra/Debug/Debugger-agent-drgn/src/main/py/src/ghidradrgn/util.py new file mode 100644 index 00000000000..2dcdce204e0 --- /dev/null +++ b/Ghidra/Debug/Debugger-agent-drgn/src/main/py/src/ghidradrgn/util.py @@ -0,0 +1,115 @@ +## ### +# IP: GHIDRA +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +## +from collections import namedtuple +import os +import re +import sys + +import drgn +import drgn.cli + + +DrgnVersion = namedtuple('DrgnVersion', ['display', 'full']) + +selected_pid = 0 +selected_tid = 0 +selected_level = 0 + + +def _compute_drgn_ver(): + blurb = drgn.cli.version_header() + top = blurb.split('\n')[0] + full = top.split()[1] # "drgn x.y.z" + return DrgnVersion(top, full) + + +DRGN_VERSION = _compute_drgn_ver() + +def full_mem(self): + return Region(0, 1 << 64, 0, None, 'full memory') + + +def get_debugger(): + return drgn + + +def get_target(): + return commands.prog + + +def get_process(name): + return get_target()[name] + + +def selected_process(): + return selected_pid + + +def selected_thread(): + return selected_tid + + +def selected_frame(): + return selected_level + + +def select_process(id: int): + global selected_pid + selected_pid = id + return selected_pid + + +def select_thread(id: int): + global selected_tid + selected_tid = id + return selected_tid + + +def select_frame(id: int): + global selected_level + selected_level = id + return selected_level + + +conv_map = {} + + +def get_convenience_variable(id): + #val = get_target().GetEnvironment().Get(id) + if id not in conv_map: + return "auto" + val = conv_map[id] + if val is None: + return "auto" + return val + + +def set_convenience_variable(id, value): + #env = get_target().GetEnvironment() + # return env.Set(id, value, True) + conv_map[id] = value + + +def escape_ansi(line): + ansi_escape = re.compile(r'(\x9B|\x1B\[)[0-?]*[ -\/]*[@-~]') + return ansi_escape.sub('', line) + + +def debracket(init): + val = init + val = val.replace("[", "(") + val = val.replace("]", ")") + return val diff --git a/Ghidra/Debug/Debugger-rmi-trace/src/main/help/help/topics/TraceRmiLauncherServicePlugin/TraceRmiLauncherServicePlugin.html b/Ghidra/Debug/Debugger-rmi-trace/src/main/help/help/topics/TraceRmiLauncherServicePlugin/TraceRmiLauncherServicePlugin.html index 2d53e46cfdb..4cbe6a17aeb 100644 --- a/Ghidra/Debug/Debugger-rmi-trace/src/main/help/help/topics/TraceRmiLauncherServicePlugin/TraceRmiLauncherServicePlugin.html +++ b/Ghidra/Debug/Debugger-rmi-trace/src/main/help/help/topics/TraceRmiLauncherServicePlugin/TraceRmiLauncherServicePlugin.html @@ -1043,6 +1043,74 @@

Options

execution. +

Drgn Launchers

+ +

The following launchers uses Meta's drgn engine to explore various targets:

+ +

drgn

+ +

This launcher attaches to a running process via the Linux "/proc/pid" interface.

+ +

Setup

+ +

You must have Meta's drgn installed on the local system. The default behavior + assumes you do NOT need root access to attach to a running process, i.e. it assumes you + have run the command:

+ +
    +
  • +
    +echo 0 > /proc/sys/kernel/yama/ptrace_scope
    +
    +
  • +
+ +

using root privileges at some point. Alternately, you can prepend "sudo -E" + to the drgn invocation line in "local-drgn.sh"". Note: drgn does not currently + support stack unwinding or register access for user-mode access to running processes. +

+ +

Options

+ +
    +
  • PID: The running process's id
  • + +
+ +

drgn-core

+ +

This launcher loads a Linux core dump.

+ +

Setup

+ +

You must have Meta's drgn installed on the local system. No other setup is required. + Note: Core dumps may or may not include memory, so the Dynamic Listing may or may not be populated. +

+ +

Options

+ +
    +
  • Core dump: The core-dump file
  • + +
+ +

drgn-kernel

+ +

This launcher attaches to a Linux kernel via the "/proc/kcore" interface.

+ +

Setup

+ +

You must have Meta's drgn installed on the local system. No other setup is required. + Note: requires root access - you will be prompted for a password in the Terminal. +

+ +

Options

+ +
    +
  • None
  • + +
+

Development and Diagnostic Launchers

We currently provide one launcher for Trace RMI API exploration and development:

diff --git a/Ghidra/Test/DebuggerIntegrationTest/src/test.slow/java/agent/drgn/rmi/AbstractDrgnTraceRmiTest.java b/Ghidra/Test/DebuggerIntegrationTest/src/test.slow/java/agent/drgn/rmi/AbstractDrgnTraceRmiTest.java new file mode 100644 index 00000000000..befdaee92d7 --- /dev/null +++ b/Ghidra/Test/DebuggerIntegrationTest/src/test.slow/java/agent/drgn/rmi/AbstractDrgnTraceRmiTest.java @@ -0,0 +1,379 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package agent.drgn.rmi; + +import static org.junit.Assert.*; +import static org.junit.Assume.*; + +import java.io.FileWriter; +import java.io.IOException; +import java.net.*; +import java.nio.file.*; +import java.util.Map; +import java.util.Objects; +import java.util.concurrent.*; +import java.util.concurrent.atomic.AtomicReference; +import java.util.function.*; + +import org.apache.commons.lang3.exception.ExceptionUtils; +import org.junit.Before; + +import generic.jar.ResourceFile; +import ghidra.app.plugin.core.debug.gui.AbstractGhidraHeadedDebuggerTest; +import ghidra.app.plugin.core.debug.service.tracermi.TraceRmiPlugin; +import ghidra.app.plugin.core.debug.utils.ManagedDomainObject; +import ghidra.app.services.TraceRmiService; +import ghidra.debug.api.tracermi.*; +import ghidra.framework.*; +import ghidra.framework.main.ApplicationLevelOnlyPlugin; +import ghidra.framework.model.DomainFile; +import ghidra.framework.plugintool.Plugin; +import ghidra.framework.plugintool.PluginsConfiguration; +import ghidra.framework.plugintool.util.*; +import ghidra.pty.testutil.DummyProc; +import ghidra.util.Msg; +import junit.framework.AssertionFailedError; + +public abstract class AbstractDrgnTraceRmiTest extends AbstractGhidraHeadedDebuggerTest { + + protected static String CORE = "core.12137"; + protected static String MDO = "/New Traces/" + CORE; + public static String PREAMBLE = """ + import os + import drgn + import drgn.cli + os.environ['OPT_TARGET_KIND'] = 'coredump' + os.environ['OPT_TARGET_IMG'] = '$CORE' + from ghidradrgn.commands import * + """; + + // Connecting should be the first thing the script does, so use a tight timeout. + protected static final int CONNECT_TIMEOUT_MS = 3000; + protected static final int TIMEOUT_SECONDS = 30000; + protected static final int QUIT_TIMEOUT_MS = 1000; + + protected static boolean didSetupPython = false; + + protected TraceRmiService traceRmi; + private Path pythonPath; + private Path outFile; + private Path errFile; + + @Before + public void assertOS() { + assumeTrue(OperatingSystem.CURRENT_OPERATING_SYSTEM == OperatingSystem.LINUX); + } + + //@BeforeClass + public static void setupPython() throws Throwable { + if (didSetupPython) { + // Only do this once when running the full suite. + return; + } + String gradle = DummyProc.which("gradle"); + new ProcessBuilder(gradle, "Debugger-agent-drgn:assemblePyPackage") + .directory(TestApplicationUtils.getInstallationDirectory()) + .inheritIO() + .start() + .waitFor(); + didSetupPython = true; + } + + protected void setPythonPath(ProcessBuilder pb) throws IOException { + String sep = + OperatingSystem.CURRENT_OPERATING_SYSTEM == OperatingSystem.LINUX ? ";" : ":"; + String rmiPyPkg = Application.getModuleSubDirectory("Debugger-rmi-trace", + "build/pypkg/src").getAbsolutePath(); + String drgnPyPkg = Application.getModuleSubDirectory("Debugger-agent-drgn", + "build/pypkg/src").getAbsolutePath(); + String add = rmiPyPkg + sep + drgnPyPkg; + pb.environment().compute("PYTHONPATH", (k, v) -> v == null ? add : (v + sep + add)); + } + + @Before + public void setupTraceRmi() throws Throwable { + traceRmi = addPlugin(tool, TraceRmiPlugin.class); + + try { + pythonPath = Paths.get(DummyProc.which("drgn")); + } + catch (RuntimeException e) { + Msg.error(this, e); + } + outFile = Files.createTempFile("drgnout", null); + errFile = Files.createTempFile("drgnerr", null); + } + + protected void addAllDebuggerPlugins() throws PluginException { + PluginsConfiguration plugConf = new PluginsConfiguration() { + @Override + protected boolean accepts(Class pluginClass) { + return !ApplicationLevelOnlyPlugin.class.isAssignableFrom(pluginClass); + } + }; + + for (PluginDescription pd : plugConf + .getPluginDescriptions(PluginPackage.getPluginPackage("Debugger"))) { + addPlugin(tool, pd.getPluginClass()); + } + } + + protected static String addrToStringForPython(InetAddress address) { + if (address.isAnyLocalAddress()) { + return "127.0.0.1"; // Can't connect to 0.0.0.0 as such. Choose localhost. + } + return address.getHostAddress(); + } + + protected static String sockToStringForPython(SocketAddress address) { + if (address instanceof InetSocketAddress tcp) { + return addrToStringForPython(tcp.getAddress()) + ":" + tcp.getPort(); + } + throw new AssertionError("Unhandled address type " + address); + } + + protected record PythonResult(boolean timedOut, int exitCode, String stdout, String stderr) { + protected String handle() { + if (stderr.contains("RuntimeError") || stderr.contains(" Error") || (0 != exitCode && 1 != exitCode && 143 != exitCode)) { + throw new PythonError(exitCode, stdout, stderr); + } + System.out.println("--stdout--"); + System.out.println(stdout); + System.out.println("--stderr--"); + System.out.println(stderr); + return stdout; + } + } + + protected record ExecInDrgn(Process python, CompletableFuture future) { + } + + @SuppressWarnings("resource") // Do not close stdin + protected ExecInDrgn execInDrgn(String script) throws IOException { + ResourceFile rf = Application.getModuleDataFile("TestResources", CORE); + script = script.replace("$CORE", rf.getAbsolutePath()); + Path fp = Files.createTempFile("test", ".py"); + FileWriter fw = new FileWriter(fp.toFile()); + fw.write(script); + fw.close(); + ProcessBuilder pb = new ProcessBuilder(pythonPath.toString(), "-c", + rf.getAbsolutePath(), fp.toFile().getAbsolutePath()); + setPythonPath(pb); + + // If commands come from file, Python will quit after EOF. + Msg.info(this, "outFile: " + outFile); + Msg.info(this, "errFile: " + errFile); + + //pb.inheritIO(); + pb.redirectInput(ProcessBuilder.Redirect.PIPE); + pb.redirectOutput(outFile.toFile()); + pb.redirectError(errFile.toFile()); + Process pyproc = pb.start(); + return new ExecInDrgn(pyproc, CompletableFuture.supplyAsync(() -> { + try { + if (!pyproc.waitFor(TIMEOUT_SECONDS, TimeUnit.SECONDS)) { + Msg.error(this, "Timed out waiting for Python"); + pyproc.destroyForcibly(); + pyproc.waitFor(TIMEOUT_SECONDS, TimeUnit.SECONDS); + return new PythonResult(true, -1, Files.readString(outFile), + Files.readString(errFile)); + } + Msg.info(this, "Python exited with code " + pyproc.exitValue()); + return new PythonResult(false, pyproc.exitValue(), Files.readString(outFile), + Files.readString(errFile)); + } + catch (Exception e) { + return ExceptionUtils.rethrow(e); + } + finally { + pyproc.destroyForcibly(); + } + })); + } + + public static class PythonError extends RuntimeException { + public final int exitCode; + public final String stdout; + public final String stderr; + + public PythonError(int exitCode, String stdout, String stderr) { + super(""" + exitCode=%d: + ----stdout---- + %s + ----stderr---- + %s + """.formatted(exitCode, stdout, stderr)); + this.exitCode = exitCode; + this.stdout = stdout; + this.stderr = stderr; + } + } + + protected String runThrowError(String script) throws Exception { + CompletableFuture result = execInDrgn(script).future; + return result.get(TIMEOUT_SECONDS, TimeUnit.SECONDS).handle(); + } + + protected record PythonAndConnection(ExecInDrgn exec, TraceRmiConnection connection) + implements AutoCloseable { + protected RemoteMethod getMethod(String name) { + return Objects.requireNonNull(connection.getMethods().get(name)); + } + + public void execute(String cmd) { + RemoteMethod execute = getMethod("execute"); + execute.invoke(Map.of("cmd", cmd)); + } + + public RemoteAsyncResult executeAsync(String cmd) { + RemoteMethod execute = getMethod("execute"); + return execute.invokeAsync(Map.of("cmd", cmd)); + } + + public String executeCapture(String cmd) { + RemoteMethod execute = getMethod("execute"); + return (String) execute.invoke(Map.of("cmd", cmd, "to_string", true)); + } + + @Override + public void close() throws Exception { + Msg.info(this, "Cleaning up python"); + exec.python().destroy(); + try { + PythonResult r = exec.future.get(TIMEOUT_SECONDS, TimeUnit.SECONDS); + r.handle(); + waitForPass(() -> assertTrue(connection.isClosed())); + } + finally { + exec.python.destroyForcibly(); + } + } + } + + protected PythonAndConnection startAndConnectDrgn(Function scriptSupplier) + throws Exception { + TraceRmiAcceptor acceptor = traceRmi.acceptOne(null); + ExecInDrgn exec = + execInDrgn(scriptSupplier.apply(sockToStringForPython(acceptor.getAddress()))); + acceptor.setTimeout(CONNECT_TIMEOUT_MS); + try { + TraceRmiConnection connection = acceptor.accept(); + return new PythonAndConnection(exec, connection); + } + catch (SocketTimeoutException e) { + exec.python.destroyForcibly(); + exec.future.get(TIMEOUT_SECONDS, TimeUnit.SECONDS).handle(); + throw e; + } + } + + protected PythonAndConnection startAndConnectDrgn() throws Exception { + return startAndConnectDrgn(addr -> """ + %s + ghidra_trace_connect('%s') + drgn.cli.run_interactive(prog) + """.formatted(PREAMBLE, addr)); + } + + @SuppressWarnings("resource") + protected String runThrowError(Function scriptSupplier) + throws Exception { + PythonAndConnection conn = startAndConnectDrgn(scriptSupplier); + PythonResult r = conn.exec.future.get(TIMEOUT_SECONDS, TimeUnit.SECONDS); + String stdout = r.handle(); + //waitForPass(() -> assertTrue(conn.connection.isClosed())); + return stdout; + } + + protected String extractOutSection(String out, String head) { + String[] split = out.split("\n"); + String xout = ""; + for (String s : split) { + if (!s.startsWith("(python)") && !s.equals("")) { + xout += s + "\n"; + } + } + return xout.split(head)[1].split("---")[0].replace("(python)", "").trim(); + } + + protected ManagedDomainObject openDomainObject(String path) throws Exception { + DomainFile df = env.getProject().getProjectData().getFile(path); + assertNotNull(df); + return new ManagedDomainObject(df, false, false, monitor); + } + + protected ManagedDomainObject waitDomainObject(String path) throws Exception { + DomainFile df; + long start = System.currentTimeMillis(); + while (true) { + df = env.getProject().getProjectData().getFile(path); + if (df != null) { + return new ManagedDomainObject(df, false, false, monitor); + } + Thread.sleep(1000); + if (System.currentTimeMillis() - start > 30000) { + throw new TimeoutException("30 seconds expired waiting for domain file"); + } + } + } + + protected long getMaxSnap() { + Long maxSnap = tb.trace.getTimeManager().getMaxSnap(); + return maxSnap == null ? 0 : maxSnap; + } + + protected void waitTxDone() { + waitFor(() -> tb.trace.getCurrentTransactionInfo() == null); + } + + public static void waitForPass(Runnable runnable) { + AtomicReference lastError = new AtomicReference<>(); + waitForCondition(() -> { + try { + runnable.run(); + return true; + } + catch (AssertionError e) { + lastError.set(e); + return false; + } + }, () -> lastError.get().getMessage()); + } + + public static void waitForCondition(BooleanSupplier condition, + Supplier failureMessageSupplier) throws AssertionFailedError { + + int totalTime = 0; + while (totalTime <= DEFAULT_WAIT_TIMEOUT * 10) { + + if (condition.getAsBoolean()) { + return; // success + } + + totalTime += sleep(DEFAULT_WAIT_DELAY * 10); + } + + String failureMessage = "Timed-out waiting for condition"; + if (failureMessageSupplier != null) { + failureMessage = failureMessageSupplier.get(); + } + + throw new AssertionFailedError(failureMessage); + } + + +} diff --git a/Ghidra/Test/DebuggerIntegrationTest/src/test.slow/java/agent/drgn/rmi/DrgnCommandsTest.java b/Ghidra/Test/DebuggerIntegrationTest/src/test.slow/java/agent/drgn/rmi/DrgnCommandsTest.java new file mode 100644 index 00000000000..69b77f08a25 --- /dev/null +++ b/Ghidra/Test/DebuggerIntegrationTest/src/test.slow/java/agent/drgn/rmi/DrgnCommandsTest.java @@ -0,0 +1,909 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package agent.drgn.rmi; + +import static org.hamcrest.Matchers.*; +import static org.junit.Assert.*; + +import java.util.*; +import java.util.concurrent.atomic.AtomicReference; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +import org.junit.Test; + +import db.Transaction; +import generic.Unique; +import ghidra.app.plugin.core.debug.utils.ManagedDomainObject; +import ghidra.debug.api.tracermi.TraceRmiAcceptor; +import ghidra.debug.api.tracermi.TraceRmiConnection; +import ghidra.framework.Application; +import ghidra.framework.model.DomainFile; +import ghidra.program.model.address.Address; +import ghidra.program.model.address.AddressSpace; +import ghidra.program.model.data.Float10DataType; +import ghidra.program.model.lang.RegisterValue; +import ghidra.program.model.listing.CodeUnit; +import ghidra.trace.database.ToyDBTraceBuilder; +import ghidra.trace.model.Lifespan; +import ghidra.trace.model.Trace; +import ghidra.trace.model.listing.TraceCodeSpace; +import ghidra.trace.model.memory.TraceMemoryRegion; +import ghidra.trace.model.memory.TraceMemorySpace; +import ghidra.trace.model.modules.TraceModule; +import ghidra.trace.model.target.TraceObject; +import ghidra.trace.model.target.TraceObjectValue; +import ghidra.trace.model.target.path.KeyPath; +import ghidra.trace.model.target.path.PathFilter; +import ghidra.trace.model.thread.TraceThread; +import ghidra.trace.model.time.TraceSnapshot; +import ghidra.util.Msg; + +public class DrgnCommandsTest extends AbstractDrgnTraceRmiTest { + + //@Test + public void testManual() throws Exception { + TraceRmiAcceptor acceptor = traceRmi.acceptOne(null); + Msg.info(this, + "Use: ghidra_trace_connect(" + sockToStringForPython(acceptor.getAddress()) + ")"); + TraceRmiConnection connection = acceptor.accept(); + Msg.info(this, "Connected: " + sockToStringForPython(connection.getRemoteAddress())); + connection.waitClosed(); + Msg.info(this, "Closed"); + } + + @Test + public void testConnectErrorNoArg() throws Exception { + try { + runThrowError(""" + from ghidradrgn.commands import * + ghidra_trace_connect() + quit() + """); + fail(); + } + catch (PythonError e) { + assertThat(e.stderr, containsString("'ghidra_trace_connect'")); + assertThat(e.stderr, containsString("'address'")); + } + } + + @Test + public void testConnect() throws Exception { + runThrowError(addr -> """ + %s + ghidra_trace_connect('%s') + quit() + """.formatted(PREAMBLE, addr)); + } + + @Test + public void testDisconnect() throws Exception { + runThrowError(addr -> """ + %s + ghidra_trace_connect('%s') + ghidra_trace_disconnect() + quit() + """.formatted(PREAMBLE, addr)); + } + + @Test + public void testStartTraceDefaults() throws Exception { + // Default name and lcsp + runThrowError(addr -> """ + %s + ghidra_trace_connect('%s') + ghidra_trace_create() + quit() + """.formatted(PREAMBLE, addr)); + try (ManagedDomainObject mdo = openDomainObject(MDO)) { + tb = new ToyDBTraceBuilder((Trace) mdo.get()); + assertEquals("x86:LE:64:default", + tb.trace.getBaseLanguage().getLanguageID().getIdAsString()); + assertEquals("gcc", + tb.trace.getBaseCompilerSpec().getCompilerSpecID().getIdAsString()); + } + } + + @Test + public void testStartTraceDefaultNoFile() throws Exception { + runThrowError(addr -> """ + %s + ghidra_trace_connect('%s') + ghidra_trace_start() + quit() + """.formatted(PREAMBLE, addr)); + try (ManagedDomainObject mdo = openDomainObject("/New Traces/drgn/noname")) { + assertThat(mdo.get(), instanceOf(Trace.class)); + } + } + + @Test + public void testStartTraceCustomize() throws Exception { + runThrowError( + addr -> """ + %s + ghidra_trace_connect('%s') + ghidra_trace_create(start_trace=False) + util.set_convenience_variable('ghidra-language','Toy:BE:64:default') + util.set_convenience_variable('ghidra-compiler','default') + ghidra_trace_start('myToy') + quit() + """ + .formatted(PREAMBLE, addr)); + DomainFile df = env.getProject().getProjectData().getFile("/New Traces/myToy"); + assertNotNull(df); + try (ManagedDomainObject mdo = new ManagedDomainObject(df, false, false, monitor)) { + tb = new ToyDBTraceBuilder((Trace) mdo.get()); + assertEquals("Toy:BE:64:default", + tb.trace.getBaseLanguage().getLanguageID().getIdAsString()); + assertEquals("default", + tb.trace.getBaseCompilerSpec().getCompilerSpecID().getIdAsString()); + } + } + + @Test + public void testStopTrace() throws Exception { + runThrowError(addr -> """ + %s + ghidra_trace_connect('%s') + ghidra_trace_create() + ghidra_trace_stop() + quit() + """.formatted(PREAMBLE, addr)); + DomainFile df = + env.getProject().getProjectData().getFile(MDO); + assertNotNull(df); + } + + @Test + public void testInfo() throws Exception { + AtomicReference refAddr = new AtomicReference<>(); + String out = runThrowError(addr -> { + refAddr.set(addr); + return """ + %s + print('---Import---') + ghidra_trace_info() + print('---BeforeConnect---') + ghidra_trace_connect('%s') + print('---Connect---') + ghidra_trace_info() + print('---Create---') + ghidra_trace_create() + print('---Start---') + ghidra_trace_info() + ghidra_trace_stop() + print('---Stop---') + ghidra_trace_info() + ghidra_trace_disconnect() + print('---Disconnect---') + ghidra_trace_info() + quit() + """.formatted(PREAMBLE, addr); + }); + + assertEquals(""" + Not connected to Ghidra""", + extractOutSection(out, "---Import---")); + assertEquals(""" + Connected to %s %s at %s + No trace""".formatted( + Application.getName(), Application.getApplicationVersion(), refAddr.get()), + extractOutSection(out, "---Connect---").replaceAll("\r", "")); + assertEquals(""" + Connected to %s %s at %s + Trace active""".formatted( + Application.getName(), Application.getApplicationVersion(), refAddr.get()), + extractOutSection(out, "---Start---").replaceAll("\r", "")); + assertEquals(""" + Connected to %s %s at %s + No trace""".formatted( + Application.getName(), Application.getApplicationVersion(), refAddr.get()), + extractOutSection(out, "---Stop---").replaceAll("\r", "")); + assertEquals(""" + Not connected to Ghidra""", + extractOutSection(out, "---Disconnect---")); + } + + @Test + public void testLcsp() throws Exception { + String out = runThrowError(addr -> + """ + %s + ghidra_trace_connect('%s') + print('---Import---') + ghidra_trace_info_lcsp() + print('---Create---') + ghidra_trace_create() + print('---File---') + ghidra_trace_info_lcsp() + util.set_convenience_variable('ghidra-language','DATA:BE:64:default') + print('---Language---') + ghidra_trace_info_lcsp() + util.set_convenience_variable('ghidra-compiler','posStack') + print('---Compiler---') + ghidra_trace_info_lcsp() + quit() + """.formatted(PREAMBLE, addr)); + + assertEquals(""" + Selected Ghidra language: x86:LE:64:default + Selected Ghidra compiler: gcc""", + extractOutSection(out, "---File---").replaceAll("\r", "")); + assertEquals(""" + Using the DATA64 compiler map + Selected Ghidra language: DATA:BE:64:default + Selected Ghidra compiler: pointer64""", + extractOutSection(out, "---Language---").replaceAll("\r", "")); + assertEquals(""" + Selected Ghidra language: DATA:BE:64:default + Selected Ghidra compiler: posStack""", + extractOutSection(out, "---Compiler---").replaceAll("\r", "")); + } + + @Test + public void testSnapshot() throws Exception { + runThrowError(addr -> """ + %s + ghidra_trace_connect('%s') + ghidra_trace_create() + ghidra_trace_txstart('Create snapshot') + ghidra_trace_new_snap('Scripted snapshot') + ghidra_trace_txcommit() + quit() + """.formatted(PREAMBLE, addr)); + try (ManagedDomainObject mdo = openDomainObject(MDO)) { + tb = new ToyDBTraceBuilder((Trace) mdo.get()); + TraceSnapshot snapshot = Unique.assertOne(tb.trace.getTimeManager().getAllSnapshots()); + assertEquals(0, snapshot.getKey()); + assertEquals("Scripted snapshot", snapshot.getDescription()); + } + } + + @Test + public void testPutreg() throws Exception { + String count = IntStream.iterate(0, i -> i < 32, i -> i + 1) + .mapToObj(Integer::toString) + .collect(Collectors.joining(",", "{", "}")); + runThrowError(addr -> """ + %s + ghidra_trace_connect('%s') + ghidra_trace_create() + ghidra_trace_txstart('Create snapshot') + ghidra_trace_new_snap('Scripted snapshot') + ghidra_trace_putreg() + ghidra_trace_txcommit() + quit() + """.formatted(PREAMBLE, addr, count)); + try (ManagedDomainObject mdo = openDomainObject(MDO)) { + tb = new ToyDBTraceBuilder((Trace) mdo.get()); + long snap = Unique.assertOne(tb.trace.getTimeManager().getAllSnapshots()).getKey(); + List regVals = tb.trace.getObjectManager() + .getValuePaths(Lifespan.at(0), + PathFilter.parse("Processes[].Threads[].Stack[].Registers")) + .map(p -> p.getLastEntry()) + .toList(); + TraceObjectValue tobj = regVals.get(0); + AddressSpace t1f0 = tb.trace.getBaseAddressFactory() + .getAddressSpace(tobj.getCanonicalPath().toString()); + TraceMemorySpace regs = tb.trace.getMemoryManager().getMemorySpace(t1f0, false); + + RegisterValue rip = regs.getValue(snap, tb.reg("rip")); + assertEquals("3a40cdf7ff7f0000", rip.getUnsignedValue().toString(16)); + + try (Transaction tx = tb.trace.openTransaction("Float80 unit")) { + TraceCodeSpace code = tb.trace.getCodeManager().getCodeSpace(t1f0, true); + code.definedData() + .create(Lifespan.nowOn(0), tb.reg("st0"), Float10DataType.dataType); + } + } + } + + @Test + public void testDelreg() throws Exception { + String count = IntStream.iterate(0, i -> i < 32, i -> i + 1) + .mapToObj(Integer::toString) + .collect(Collectors.joining(",", "{", "}")); + runThrowError(addr -> """ + %s + ghidra_trace_connect('%s') + ghidra_trace_create() + ghidra_trace_txstart('Create snapshot') + ghidra_trace_new_snap('Scripted snapshot') + ghidra_trace_putreg() + ghidra_trace_delreg() + ghidra_trace_txcommit() + quit() + """.formatted(PREAMBLE, addr, count)); + // The spaces will be left over, but the values should be zeroed + try (ManagedDomainObject mdo = openDomainObject(MDO)) { + tb = new ToyDBTraceBuilder((Trace) mdo.get()); + long snap = Unique.assertOne(tb.trace.getTimeManager().getAllSnapshots()).getKey(); + List regVals = tb.trace.getObjectManager() + .getValuePaths(Lifespan.at(0), + PathFilter.parse("Processes[].Threads[].Stack[].Registers")) + .map(p -> p.getLastEntry()) + .toList(); + TraceObjectValue tobj = regVals.get(0); + AddressSpace t1f0 = tb.trace.getBaseAddressFactory() + .getAddressSpace(tobj.getCanonicalPath().toString()); + TraceMemorySpace regs = tb.trace.getMemoryManager().getMemorySpace(t1f0, false); + + RegisterValue rax = regs.getValue(snap, tb.reg("rax")); + assertEquals("0", rax.getUnsignedValue().toString(16)); + } + } + + @Test + public void testCreateObj() throws Exception { + String out = runThrowError(addr -> """ + %s + ghidra_trace_connect('%s') + ghidra_trace_start() + ghidra_trace_txstart('Create Object') + print('---Id---') + ghidra_trace_create_obj('Test.Objects[1]') + print('---') + ghidra_trace_txcommit() + quit() + """.formatted(PREAMBLE, addr)); + try (ManagedDomainObject mdo = openDomainObject("/New Traces/drgn/noname")) { + tb = new ToyDBTraceBuilder((Trace) mdo.get()); + TraceObject object = tb.trace.getObjectManager() + .getObjectByCanonicalPath(KeyPath.parse("Test.Objects[1]")); + assertNotNull(object); + String created = extractOutSection(out, "---Id---"); + long id = Long.parseLong(created.split("id=")[1].split(",")[0]); + assertEquals(object.getKey(), id); + } + } + + @Test + public void testInsertObj() throws Exception { + String out = runThrowError(addr -> """ + %s + ghidra_trace_connect('%s') + ghidra_trace_start() + ghidra_trace_txstart('Create Object') + ghidra_trace_create_obj('Test.Objects[1]') + print('---Lifespan---') + ghidra_trace_insert_obj('Test.Objects[1]') + print('---') + ghidra_trace_txcommit() + quit() + """.formatted(PREAMBLE, addr)); + try (ManagedDomainObject mdo = openDomainObject("/New Traces/drgn/noname")) { + tb = new ToyDBTraceBuilder((Trace) mdo.get()); + TraceObject object = tb.trace.getObjectManager() + .getObjectByCanonicalPath(KeyPath.parse("Test.Objects[1]")); + assertNotNull(object); + Lifespan life = Unique.assertOne(object.getLife().spans()); + assertEquals(Lifespan.nowOn(0), life); + assertEquals("Inserted object: lifespan=[0,+inf)", + extractOutSection(out, "---Lifespan---")); + } + } + + @Test + public void testRemoveObj() throws Exception { + runThrowError(addr -> """ + %s + ghidra_trace_connect('%s') + ghidra_trace_create() + ghidra_trace_txstart('Create Object') + ghidra_trace_create_obj('Test.Objects[1]') + ghidra_trace_insert_obj('Test.Objects[1]') + ghidra_trace_set_snap(1) + ghidra_trace_remove_obj('Test.Objects[1]') + ghidra_trace_txcommit() + quit() + """.formatted(PREAMBLE, addr)); + try (ManagedDomainObject mdo = openDomainObject(MDO)) { + tb = new ToyDBTraceBuilder((Trace) mdo.get()); + TraceObject object = tb.trace.getObjectManager() + .getObjectByCanonicalPath(KeyPath.parse("Test.Objects[1]")); + assertNotNull(object); + Lifespan life = Unique.assertOne(object.getLife().spans()); + assertEquals(Lifespan.at(0), life); + } + } + + @SuppressWarnings("unchecked") + protected T runTestSetValue(String extra, String drgnExpr, String gtype) + throws Exception { + runThrowError(addr -> """ + %s + ghidra_trace_connect('%s') + ghidra_trace_create() + ghidra_trace_txstart('Create Object') + ghidra_trace_create_obj('Test.Objects[1]') + ghidra_trace_insert_obj('Test.Objects[1]') + %s + ghidra_trace_set_value('Test.Objects[1]', 'test', %s, '%s') + ghidra_trace_txcommit() + quit() + """.formatted(PREAMBLE, addr, extra, drgnExpr, gtype)); + try (ManagedDomainObject mdo = openDomainObject(MDO)) { + tb = new ToyDBTraceBuilder((Trace) mdo.get()); + TraceObject object = tb.trace.getObjectManager() + .getObjectByCanonicalPath(KeyPath.parse("Test.Objects[1]")); + assertNotNull(object); + TraceObjectValue value = object.getValue(0, "test"); + return value == null ? null : (T) value.getValue(); + } + } + + @Test + public void testSetValueNull() throws Exception { + assertNull(runTestSetValue("", "None", "VOID")); + } + + @Test + public void testSetValueBool() throws Exception { + assertEquals(Boolean.TRUE, runTestSetValue("", "True", "BOOL")); + } + + @Test + public void testSetValueByte() throws Exception { + assertEquals(Byte.valueOf((byte) 1), runTestSetValue("", "'1'", "BYTE")); + } + + @Test + public void testSetValueChar() throws Exception { + assertEquals(Character.valueOf('A'), runTestSetValue("", "'A'", "CHAR")); + } + + @Test + public void testSetValueShort() throws Exception { + assertEquals(Short.valueOf((short) 1), runTestSetValue("", "'1'", "SHORT")); + } + + @Test + public void testSetValueInt() throws Exception { + assertEquals(Integer.valueOf(1), runTestSetValue("", "'1'", "INT")); + } + + @Test + public void testSetValueLong() throws Exception { + assertEquals(Long.valueOf(1), runTestSetValue("", "'1'", "LONG")); + } + + @Test + public void testSetValueString() throws Exception { + assertEquals("HelloWorld!", runTestSetValue("", "\'HelloWorld!\'", "STRING")); + } + + @Test //- how do we input long strings in python + public void testSetValueStringWide() throws Exception { + assertEquals("HelloWorld!", runTestSetValue("", "u\'HelloWorld!\'", "STRING")); + } + + @Test + public void testSetValueBoolArr() throws Exception { + assertArrayEquals(new boolean[] { true, false }, + runTestSetValue("", "[True,False]", "BOOL_ARR")); + } + + @Test + public void testSetValueByteArrUsingString() throws Exception { + assertArrayEquals(new byte[] { 'H', 1, 'W' }, + runTestSetValue("", "'H\\1W'", "BYTE_ARR")); + } + + @Test + public void testSetValueByteArrUsingArray() throws Exception { + assertArrayEquals(new byte[] { 'H', 0, 'W' }, + runTestSetValue("", "['H',0,'W']", "BYTE_ARR")); + } + + @Test + public void testSetValueCharArrUsingString() throws Exception { + assertArrayEquals(new char[] { 'H', 1, 'W' }, + runTestSetValue("", "'H\\1W'", "CHAR_ARR")); + } + + @Test + public void testSetValueCharArrUsingArray() throws Exception { + assertArrayEquals(new char[] { 'H', 0, 'W' }, + runTestSetValue("", "['H',0,'W']", "CHAR_ARR")); + } + + @Test + public void testSetValueShortArrUsingString() throws Exception { + assertArrayEquals(new short[] { 'H', 1, 'W' }, + runTestSetValue("", "'H\\1W'", "SHORT_ARR")); + } + + @Test + public void testSetValueShortArrUsingArray() throws Exception { + assertArrayEquals(new short[] { 'H', 0, 'W' }, + runTestSetValue("", "['H',0,'W']", "SHORT_ARR")); + } + + @Test + public void testSetValueIntArrayUsingMixedArray() throws Exception { + // Because explicit array type is chosen, we get null terminator + assertArrayEquals(new int[] { 'H', 0, 'W' }, + runTestSetValue("", "['H',0,'W']", "INT_ARR")); + } + + @Test + public void testSetValueIntArrUsingArray() throws Exception { + assertArrayEquals(new int[] { 1, 2, 3, 4 }, + runTestSetValue("", "[1,2,3,4]", "INT_ARR")); + } + + @Test + public void testSetValueLongArr() throws Exception { + assertArrayEquals(new long[] { 1, 2, 3, 4 }, + runTestSetValue("", "[1,2,3,4]", "LONG_ARR")); + } + + @Test + public void testSetValueStringArr() throws Exception { + assertArrayEquals(new String[] { "1", "A", "dead", "beef" }, + runTestSetValue("", "['1','A','dead','beef']", "STRING_ARR")); + } + + @Test + public void testSetValueAddress() throws Exception { + Address address = runTestSetValue("", "0xdeadbeef", "ADDRESS"); + // Don't have the address factory to create expected address + assertEquals(0xdeadbeefL, address.getOffset()); + assertEquals("ram", address.getAddressSpace().getName()); + } + + @Test + public void testSetValueObject() throws Exception { + TraceObject object = runTestSetValue("", "'Test.Objects[1]'", "OBJECT"); + assertEquals("Test.Objects[1]", object.getCanonicalPath().toString()); + } + + @Test + public void testRetainValues() throws Exception { + runThrowError(addr -> """ + %s + ghidra_trace_connect('%s') + ghidra_trace_create() + ghidra_trace_txstart('Create Object') + ghidra_trace_create_obj('Test.Objects[1]') + ghidra_trace_insert_obj('Test.Objects[1]') + ghidra_trace_set_value('Test.Objects[1]', '[1]', '"A"', 'STRING') + ghidra_trace_set_value('Test.Objects[1]', '[2]', '"B"', 'STRING') + ghidra_trace_set_value('Test.Objects[1]', '[3]', '"C"', 'STRING') + ghidra_trace_set_snap(10) + ghidra_trace_retain_values('Test.Objects[1]', '[1] [3]') + ghidra_trace_txcommit() + quit() + """.formatted(PREAMBLE, addr)); + try (ManagedDomainObject mdo = openDomainObject(MDO)) { + tb = new ToyDBTraceBuilder((Trace) mdo.get()); + TraceObject object = tb.trace.getObjectManager() + .getObjectByCanonicalPath(KeyPath.parse("Test.Objects[1]")); + assertNotNull(object); + assertEquals(Map.ofEntries( + Map.entry("[1]", Lifespan.nowOn(0)), + Map.entry("[2]", Lifespan.span(0, 9)), + Map.entry("[3]", Lifespan.nowOn(0))), + object.getValues(Lifespan.ALL) + .stream() + .collect(Collectors.toMap(v -> v.getEntryKey(), v -> v.getLifespan()))); + } + } + + @Test + public void testGetObj() throws Exception { + String out = runThrowError(addr -> """ + %s + ghidra_trace_connect('%s') + ghidra_trace_start() + ghidra_trace_txstart('Create Object') + print('---Id---') + ghidra_trace_create_obj('Test.Objects[1]') + print('---') + ghidra_trace_txcommit() + print('---GetObject---') + ghidra_trace_get_obj('Test.Objects[1]') + print('---') + quit() + """.formatted(PREAMBLE, addr)); + try (ManagedDomainObject mdo = openDomainObject("/New Traces/drgn/noname")) { + tb = new ToyDBTraceBuilder((Trace) mdo.get()); + TraceObject object = tb.trace.getObjectManager() + .getObjectByCanonicalPath(KeyPath.parse("Test.Objects[1]")); + assertNotNull(object); + assertEquals("1\tTest.Objects[1]", extractOutSection(out, "---GetObject---")); + } + } + + @Test + public void testGetValues() throws Exception { + String out = runThrowError(addr -> """ + %s + ghidra_trace_connect('%s') + ghidra_trace_create() + ghidra_trace_txstart('Create Object') + ghidra_trace_create_obj('Test.Objects[1]') + ghidra_trace_insert_obj('Test.Objects[1]') + ghidra_trace_set_value('Test.Objects[1]', 'vnull', None, 'VOID') + ghidra_trace_set_value('Test.Objects[1]', 'vbool', True, 'BOOL') + ghidra_trace_set_value('Test.Objects[1]', 'vbyte', '1', 'BYTE') + ghidra_trace_set_value('Test.Objects[1]', 'vchar', 'A', 'CHAR') + ghidra_trace_set_value('Test.Objects[1]', 'vshort', '2', 'SHORT') + ghidra_trace_set_value('Test.Objects[1]', 'vint', '3', 'INT') + ghidra_trace_set_value('Test.Objects[1]', 'vlong', '4', 'LONG') + ghidra_trace_set_value('Test.Objects[1]', 'vstring', 'Hello', 'STRING') + vboolarr = [True, False] + ghidra_trace_set_value('Test.Objects[1]', 'vboolarr', vboolarr, 'BOOL_ARR') + vbytearr = [1, 2, 3] + ghidra_trace_set_value('Test.Objects[1]', 'vbytearr', vbytearr, 'BYTE_ARR') + vchararr = 'Hello' + ghidra_trace_set_value('Test.Objects[1]', 'vchararr', vchararr, 'CHAR_ARR') + vshortarr = [1, 2, 3] + ghidra_trace_set_value('Test.Objects[1]', 'vshortarr', vshortarr, 'SHORT_ARR') + vintarr = [1, 2, 3] + ghidra_trace_set_value('Test.Objects[1]', 'vintarr', vintarr, 'INT_ARR') + vlongarr = [1, 2, 3] + ghidra_trace_set_value('Test.Objects[1]', 'vlongarr', vlongarr, 'LONG_ARR') + ghidra_trace_set_value('Test.Objects[1]', 'vaddr', 0xdeadbeef, 'ADDRESS') + ghidra_trace_set_value('Test.Objects[1]', 'vobj', 'Test.Objects[1]', 'OBJECT') + ghidra_trace_txcommit() + print('---GetValues---') + ghidra_trace_get_values('Test.Objects[1].') + print('---') + quit() + """.formatted(PREAMBLE, addr)); + try (ManagedDomainObject mdo = openDomainObject(MDO)) { + tb = new ToyDBTraceBuilder((Trace) mdo.get()); + assertEquals(""" + Parent Key Span Value Type + Test.Objects[1] vaddr [0,+inf) ram:deadbeef ADDRESS + Test.Objects[1] vbool [0,+inf) True BOOL + Test.Objects[1] vboolarr [0,+inf) [True, False] BOOL_ARR + Test.Objects[1] vbyte [0,+inf) 1 BYTE + Test.Objects[1] vbytearr [0,+inf) b'\\x01\\x02\\x03' BYTE_ARR + Test.Objects[1] vchar [0,+inf) 'A' CHAR + Test.Objects[1] vchararr [0,+inf) 'Hello' CHAR_ARR + Test.Objects[1] vint [0,+inf) 3 INT + Test.Objects[1] vintarr [0,+inf) [1, 2, 3] INT_ARR + Test.Objects[1] vlong [0,+inf) 4 LONG + Test.Objects[1] vlongarr [0,+inf) [1, 2, 3] LONG_ARR + Test.Objects[1] vobj [0,+inf) Test.Objects[1] OBJECT + Test.Objects[1] vshort [0,+inf) 2 SHORT + Test.Objects[1] vshortarr [0,+inf) [1, 2, 3] SHORT_ARR + Test.Objects[1] vstring [0,+inf) 'Hello' STRING""", + extractOutSection(out, "---GetValues---").replaceAll("\r", "")); + } + } + + @Test + public void testGetValuesRng() throws Exception { + String out = runThrowError(addr -> """ + %s + ghidra_trace_connect('%s') + ghidra_trace_create() + ghidra_trace_txstart('Create Object') + ghidra_trace_create_obj('Test.Objects[1]') + ghidra_trace_insert_obj('Test.Objects[1]') + ghidra_trace_set_value('Test.Objects[1]', 'vaddr', 0xdeadbeef, 'ADDRESS') + ghidra_trace_txcommit() + print('---GetValues---') + ghidra_trace_get_values_rng(0xdeadbeef, 10) + print('---') + quit() + """.formatted(PREAMBLE, addr)); + try (ManagedDomainObject mdo = openDomainObject(MDO)) { + tb = new ToyDBTraceBuilder((Trace) mdo.get()); + assertEquals(""" + Parent Key Span Value Type + Test.Objects[1] vaddr [0,+inf) ram:deadbeef ADDRESS""", + extractOutSection(out, "---GetValues---").replaceAll("\r", "")); + } + } + + @Test + public void testActivateObject() throws Exception { + runThrowError(addr -> """ + %s + ghidra_trace_connect('%s') + ghidra_trace_create() + #set language c++ + ghidra_trace_txstart('Create Object') + ghidra_trace_create_obj('Test.Objects[1]') + ghidra_trace_insert_obj('Test.Objects[1]') + ghidra_trace_txcommit() + ghidra_trace_activate('Test.Objects[1]') + quit() + """.formatted(PREAMBLE, addr)); + try (ManagedDomainObject mdo = openDomainObject(MDO)) { + assertSame(mdo.get(), traceManager.getCurrentTrace()); + assertEquals("Test.Objects[1]", + traceManager.getCurrentObject().getCanonicalPath().toString()); + } + } + + @Test + public void testDisassemble() throws Exception { + String out = runThrowError(addr -> """ + %s + ghidra_trace_connect('%s') + ghidra_trace_create() + ghidra_trace_txstart('Tx') + pc = get_pc() + ghidra_trace_putmem(pc, 16) + print('---Disassemble---') + ghidra_trace_disassemble(pc) + print('---') + ghidra_trace_txcommit() + quit() + """.formatted(PREAMBLE, addr)); + try (ManagedDomainObject mdo = openDomainObject(MDO)) { + tb = new ToyDBTraceBuilder((Trace) mdo.get()); + // Not concerned about specifics, so long as disassembly occurs + long total = 0; + for (CodeUnit cu : tb.trace.getCodeManager().definedUnits().get(0, true)) { + total += cu.getLength(); + } + String extract = extractOutSection(out, "---Disassemble---"); + String[] split = extract.split("\r\n"); + // NB: core.12137 has no memory + //assertEquals("Disassembled %d bytes".formatted(total), + // split[0]); + assertEquals(0, total); + assertEquals("", split[0]); + } + } + + @Test + public void testPutProcesses() throws Exception { + runThrowError(addr -> """ + %s + ghidra_trace_connect('%s') + ghidra_trace_start() + ghidra_trace_txstart('Tx') + ghidra_trace_put_processes() + ghidra_trace_txcommit() + quit() + """.formatted(PREAMBLE, addr)); + try (ManagedDomainObject mdo = openDomainObject("/New Traces/drgn/noname")) { + tb = new ToyDBTraceBuilder((Trace) mdo.get()); + // Would be nice to control / validate the specifics + Collection processes = tb.trace.getObjectManager() + .getValuePaths(Lifespan.at(0), PathFilter.parse("Processes[]")) + .map(p -> p.getDestination(null)) + .toList(); + assertEquals(0, processes.size()); + } + } + + @Test + public void testPutEnvironment() throws Exception { + runThrowError(addr -> """ + %s + ghidra_trace_connect('%s') + ghidra_trace_create() + ghidra_trace_txstart('Tx') + ghidra_trace_put_environment() + ghidra_trace_txcommit() + quit() + """.formatted(PREAMBLE, addr)); + try (ManagedDomainObject mdo = openDomainObject(MDO)) { + tb = new ToyDBTraceBuilder((Trace) mdo.get()); + // Assumes LLDB on Linux amd64 + TraceObject envobj = + Objects.requireNonNull(tb.objAny("Processes[].Environment", Lifespan.at(0))); + assertEquals("drgn", envobj.getValue(0, "_debugger").getValue()); + assertEquals("X86_64", envobj.getValue(0, "_arch").getValue()); + assertEquals("Language.C", envobj.getValue(0, "_os").getValue()); + assertEquals("little", envobj.getValue(0, "_endian").getValue()); + } + } + + @Test + public void testPutRegions() throws Exception { + runThrowError(addr -> """ + %s + ghidra_trace_connect('%s') + ghidra_trace_create() + ghidra_trace_txstart('Tx') + ghidra_trace_put_regions() + ghidra_trace_txcommit() + quit() + """.formatted(PREAMBLE, addr)); + try (ManagedDomainObject mdo = openDomainObject(MDO)) { + tb = new ToyDBTraceBuilder((Trace) mdo.get()); + // Would be nice to control / validate the specifics + Collection all = + tb.trace.getMemoryManager().getAllRegions(); + assertThat(all.size(), greaterThan(2)); + } + } + + @Test + public void testPutModules() throws Exception { + runThrowError(addr -> """ + %s + ghidra_trace_connect('%s') + ghidra_trace_create() + ghidra_trace_txstart('Tx') + ghidra_trace_put_modules() + ghidra_trace_txcommit() + quit() + """.formatted(PREAMBLE, addr)); + try (ManagedDomainObject mdo = openDomainObject(MDO)) { + tb = new ToyDBTraceBuilder((Trace) mdo.get()); + // Would be nice to control / validate the specifics + Collection all = tb.trace.getModuleManager().getAllModules(); + TraceModule modBash = + Unique.assertOne(all.stream().filter(m -> m.getName().contains("helloWorld"))); + assertNotEquals(tb.addr(0), Objects.requireNonNull(modBash.getBase())); + } + } + + @Test + public void testPutThreads() throws Exception { + runThrowError(addr -> """ + %s + ghidra_trace_connect('%s') + ghidra_trace_create() + ghidra_trace_txstart('Tx') + ghidra_trace_put_threads() + ghidra_trace_txcommit() + quit() + """.formatted(PREAMBLE, addr)); + try (ManagedDomainObject mdo = openDomainObject(MDO)) { + tb = new ToyDBTraceBuilder((Trace) mdo.get()); + // Would be nice to control / validate the specifics + Collection threads = tb.trace.getThreadManager().getAllThreads(); + assertEquals(1, threads.size()); + } + } + + @Test + public void testPutFrames() throws Exception { + runThrowError(addr -> """ + %s + ghidra_trace_connect('%s') + ghidra_trace_create() + ghidra_trace_txstart('Tx') + ghidra_trace_put_frames() + ghidra_trace_txcommit() + quit() + """.formatted(PREAMBLE, addr)); + try (ManagedDomainObject mdo = openDomainObject(MDO)) { + tb = new ToyDBTraceBuilder((Trace) mdo.get()); + // Would be nice to control / validate the specifics + List stack = tb.trace.getObjectManager() + .getValuePaths(Lifespan.at(0), + PathFilter.parse("Processes[0].Threads[].Stack[]")) + .map(p -> p.getDestination(null)) + .toList(); + assertEquals(7, stack.size()); + } + } + + @Test + public void testMinimal() throws Exception { + runThrowError(addr -> """ + %s + ghidra_trace_connect('%s') + print('FINISHED') + quit() + """.formatted(PREAMBLE, addr)); + } + +} diff --git a/Ghidra/Test/DebuggerIntegrationTest/src/test.slow/java/agent/drgn/rmi/DrgnMethodsTest.java b/Ghidra/Test/DebuggerIntegrationTest/src/test.slow/java/agent/drgn/rmi/DrgnMethodsTest.java new file mode 100644 index 00000000000..a7329749d06 --- /dev/null +++ b/Ghidra/Test/DebuggerIntegrationTest/src/test.slow/java/agent/drgn/rmi/DrgnMethodsTest.java @@ -0,0 +1,286 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package agent.drgn.rmi; + +import static org.hamcrest.Matchers.*; +import static org.junit.Assert.*; + +import java.util.*; + +import org.junit.Test; + +import generic.Unique; +import generic.jar.ResourceFile; +import ghidra.app.plugin.core.debug.utils.ManagedDomainObject; +import ghidra.debug.api.tracermi.RemoteMethod; +import ghidra.framework.Application; +import ghidra.program.model.address.AddressSpace; +import ghidra.program.model.lang.RegisterValue; +import ghidra.trace.database.ToyDBTraceBuilder; +import ghidra.trace.model.Lifespan; +import ghidra.trace.model.Trace; +import ghidra.trace.model.memory.TraceMemoryRegion; +import ghidra.trace.model.memory.TraceMemorySpace; +import ghidra.trace.model.modules.TraceModule; +import ghidra.trace.model.target.TraceObject; +import ghidra.trace.model.target.path.PathFilter; +import ghidra.trace.model.target.path.PathPattern; + +public class DrgnMethodsTest extends AbstractDrgnTraceRmiTest { + + @Test + public void testExecuteCapture() throws Exception { + try (PythonAndConnection conn = startAndConnectDrgn()) { + RemoteMethod execute = conn.getMethod("execute"); + assertEquals(false, execute.parameters().get("to_string").getDefaultValue()); + assertEquals("11\n", + execute.invoke(Map.of( + "cmd", "print(3+4*2)", + "to_string", true))); + } + } + + @Test + public void testExecute() throws Exception { + try (PythonAndConnection conn = startAndConnectDrgn()) { + start(conn, null); + } + try (ManagedDomainObject mdo = openDomainObject(MDO)) { + // Just confirm it's present + } + } + + @Test + public void testRefreshProcesses() throws Exception { + try (PythonAndConnection conn = startAndConnectDrgn()) { + start(conn, null); + txCreate(conn, "Processes"); + + RemoteMethod attachCore = conn.getMethod("attach_core"); + RemoteMethod refreshProcesses = conn.getMethod("refresh_processes"); + try (ManagedDomainObject mdo = openDomainObject(MDO)) { + tb = new ToyDBTraceBuilder((Trace) mdo.get()); + TraceObject processes = Objects.requireNonNull(tb.objAny0("Processes")); + + refreshProcesses.invoke(Map.of("node", processes)); + + List list = tb.trace.getObjectManager() + .getValuePaths(Lifespan.at(getMaxSnap()), PathFilter.parse("Processes[]")) + .map(p -> p.getDestination(null)) + .toList(); + assertEquals(1, list.size()); + + ResourceFile rf = Application.getModuleDataFile("TestResources", CORE); + attachCore.invoke(Map.of("processes", processes, "core", rf.getAbsolutePath())); + refreshProcesses.invoke(Map.of("node", processes)); + + list = tb.trace.getObjectManager() + .getValuePaths(Lifespan.at(getMaxSnap()), PathFilter.parse("Processes[]")) + .map(p -> p.getDestination(null)) + .toList(); + assertEquals(2, list.size()); + + } + } + } + + @Test + public void testRefreshEnvironment() throws Exception { + try (PythonAndConnection conn = startAndConnectDrgn()) { + String path = "Processes[].Environment"; + start(conn, null); + txPut(conn, "all"); + + RemoteMethod refreshEnvironment = conn.getMethod("refresh_environment"); + try (ManagedDomainObject mdo = openDomainObject(MDO)) { + tb = new ToyDBTraceBuilder((Trace) mdo.get()); + TraceObject envobj = Objects.requireNonNull(tb.objAny0(path)); + + refreshEnvironment.invoke(Map.of("node", envobj)); + + assertEquals("drgn", envobj.getValue(0, "_debugger").getValue()); + assertEquals("X86_64", envobj.getValue(0, "_arch").getValue()); + assertEquals("Language.C", envobj.getValue(0, "_os").getValue()); + assertEquals("little", envobj.getValue(0, "_endian").getValue()); + } + } + } + + @Test + public void testRefreshThreads() throws Exception { + try (PythonAndConnection conn = startAndConnectDrgn()) { + String path = "Processes[].Threads"; + start(conn, null); + txCreate(conn, path); + + RemoteMethod refreshThreads = conn.getMethod("refresh_threads"); + try (ManagedDomainObject mdo = openDomainObject(MDO)) { + tb = new ToyDBTraceBuilder((Trace) mdo.get()); + TraceObject threads = Objects.requireNonNull(tb.objAny0(path)); + + refreshThreads.invoke(Map.of("node", threads)); + + int listSize = tb.trace.getThreadManager().getAllThreads().size(); + assertEquals(1, listSize); + } + } + } + + @Test + public void testRefreshStack() throws Exception { + try (PythonAndConnection conn = startAndConnectDrgn()) { + String path = "Processes[].Threads[].Stack"; + start(conn, null); + txPut(conn, "processes"); + + RemoteMethod refreshStack = conn.getMethod("refresh_stack"); + try (ManagedDomainObject mdo = openDomainObject(MDO)) { + tb = new ToyDBTraceBuilder((Trace) mdo.get()); + + txPut(conn, "frames"); + TraceObject stack = Objects.requireNonNull(tb.objAny0(path)); + refreshStack.invoke(Map.of("node", stack)); + + // Would be nice to control / validate the specifics + List list = tb.trace.getObjectManager() + .getValuePaths(Lifespan.at(0), + PathFilter.parse("Processes[].Threads[].Stack[]")) + .map(p -> p.getDestination(null)) + .toList(); + assertEquals(7, list.size()); + } + } + } + + @Test + public void testRefreshRegisters() throws Exception { + try (PythonAndConnection conn = startAndConnectDrgn()) { + String path = "Processes[].Threads[].Stack[].Registers"; + start(conn, null); + conn.execute("ghidra_trace_txstart('Tx')"); + conn.execute("ghidra_trace_putreg()"); + conn.execute("ghidra_trace_delreg()"); + conn.execute("ghidra_trace_txcommit()"); + + RemoteMethod refreshRegisters = conn.getMethod("refresh_registers"); + try (ManagedDomainObject mdo = openDomainObject(MDO)) { + tb = new ToyDBTraceBuilder((Trace) mdo.get()); + + TraceObject registers = Objects.requireNonNull(tb.objAny(path, Lifespan.at(0))); + refreshRegisters.invoke(Map.of("node", registers)); + + long snap = 0; + AddressSpace t1f0 = tb.trace.getBaseAddressFactory() + .getAddressSpace(registers.getCanonicalPath().toString()); + TraceMemorySpace regs = tb.trace.getMemoryManager().getMemorySpace(t1f0, false); + RegisterValue rip = regs.getValue(snap, tb.reg("rip")); + assertEquals("3a40cdf7ff7f0000", rip.getUnsignedValue().toString(16)); + } + } + } + + @Test + public void testRefreshMappings() throws Exception { + try (PythonAndConnection conn = startAndConnectDrgn()) { + String path = "Processes[].Memory"; + start(conn, null); + txCreate(conn, path); + + RemoteMethod refreshMappings = conn.getMethod("refresh_mappings"); + try (ManagedDomainObject mdo = openDomainObject(MDO)) { + tb = new ToyDBTraceBuilder((Trace) mdo.get()); + TraceObject memory = Objects.requireNonNull(tb.objAny0(path)); + + refreshMappings.invoke(Map.of("node", memory)); + + // Would be nice to control / validate the specifics + Collection all = + tb.trace.getMemoryManager().getAllRegions(); + assertThat(all.size(), greaterThan(2)); + } + } + } + + @Test + public void testRefreshModules() throws Exception { + try (PythonAndConnection conn = startAndConnectDrgn()) { + String path = "Processes[].Modules"; + start(conn, null); + txCreate(conn, path); + + RemoteMethod refreshModules = conn.getMethod("refresh_modules"); + try (ManagedDomainObject mdo = openDomainObject(MDO)) { + tb = new ToyDBTraceBuilder((Trace) mdo.get()); + TraceObject modules = Objects.requireNonNull(tb.objAny0(path)); + + refreshModules.invoke(Map.of("node", modules)); + + // Would be nice to control / validate the specifics + Collection all = tb.trace.getModuleManager().getAllModules(); + TraceModule mod = + Unique.assertOne(all.stream().filter(m -> m.getName().contains("helloWorld"))); + assertNotEquals(tb.addr(0), Objects.requireNonNull(mod.getBase())); + } + } + } + + @Test + public void testActivateThread() throws Exception { + try (PythonAndConnection conn = startAndConnectDrgn()) { + start(conn, null); + txPut(conn, "processes"); + + RemoteMethod activateThread = conn.getMethod("activate_thread"); + try (ManagedDomainObject mdo = openDomainObject(MDO)) { + tb = new ToyDBTraceBuilder((Trace) mdo.get()); + + txPut(conn, "threads"); + + PathPattern pattern = + PathFilter.parse("Processes[].Threads[]").getSingletonPattern(); + List list = tb.trace.getObjectManager() + .getValuePaths(Lifespan.at(0), pattern) + .map(p -> p.getDestination(null)) + .toList(); + assertEquals(1, list.size()); + + for (TraceObject t : list) { + activateThread.invoke(Map.of("thread", t)); + String out = conn.executeCapture("print(util.selected_thread())").strip(); + List indices = pattern.matchKeys(t.getCanonicalPath(), true); + assertEquals("%s".formatted(indices.get(1)), out); + } + } + } + } + + private void start(PythonAndConnection conn, String obj) { + conn.execute("from ghidradrgn.commands import *"); + conn.execute("ghidra_trace_create()"); + } + + private void txPut(PythonAndConnection conn, String obj) { + conn.execute("ghidra_trace_txstart('Tx')"); + conn.execute("ghidra_trace_put_" + obj + "()"); + conn.execute("ghidra_trace_txcommit()"); + } + + private void txCreate(PythonAndConnection conn, String path) { + conn.execute("ghidra_trace_txstart('Fake')"); + conn.execute("ghidra_trace_create_obj('%s')".formatted(path)); + conn.execute("ghidra_trace_txcommit()"); + } +} diff --git a/GhidraDocs/GhidraClass/Debugger/B5-AddingDebuggers.html b/GhidraDocs/GhidraClass/Debugger/B5-AddingDebuggers.html new file mode 100644 index 00000000000..afbd83e2809 --- /dev/null +++ b/GhidraDocs/GhidraClass/Debugger/B5-AddingDebuggers.html @@ -0,0 +1,148 @@ + + + + + + + Ghidra Debugger + + + + + +
+

Ghidra Debugger

+
+ +
+

Adding a debugger

+

This module walks you through an example of how to add a debugger agent to Ghidra. It has no exercises and is certainly not the only way to implement an agent, but hopefully contains some useful pointers and highlights some pit-falls that you might encounter. The example traces the implementation of an actual agent — the agent for Meta’s drgn debugger, which provides a scriptable, albeit read-only, interface to the running Linux kernel, as well as user-mode and core-dump targets.

+
+

Debugger documentation

+ +
+
+

Anatomy of a Ghidra debugger agent

+

To support debugging on various platforms, the Ghidra debugger has agents, i.e. clients capable of receiving information from a native debugger and passing it to the Ghidra GUI. They include the dbgeng agent that supports Windows debuggers, the gdb agent for gdb on a variery of platforms, the lldb agent for macOS and Linux, and the jpda agent for Java. All but the last are written in Python 3, and all communicate with the GUI via a protobuf-based protocol described in Debugger-rmi-trace.

+

At the highest level, each agent has four elements (ok, a somewhat arbitrary division, but…):

+
    +
  • debugger-launchers – A set of launchers, often a mixture of .bat,.sh, and sometime .py scripts
  • +
  • schema.xml – An object-model schema. (While expressed in XML, this is not an “XML schema”.)
  • +
  • src/ghidradrgn – Python files for architecture, commands, hooks, methods, and common utility functions
  • +
  • build.gradle – Build logic
  • +
+

Large portions of each are identical or similar across agents, so, as a general strategy, copying an existing agent and renaming all agent-specific variables, methods, etc. is not the worst plan of action. Typically, this leads to large chunks of detritus that need to be edited out late in the development process.

+
+
+

drgn as an Example

+
+

The first launcher — local-drgn.sh

+

The initial objective is to create a shell that sets up the environment variables for parameters we’ll need and invokes the target. For this project, I originally started duplicating the lldb agent and then switched to the dbgeng agent. Why? The hardest part of writing an agent is getting the initial launch pattern correct. drgn is itself written in Python. While gdb and lldb support Python as scripting languages, their cores are not Python-based. For these debuggers, the launcher runs the native debugger and instructs it to load our plugin, which is the agent. The dbgeng agent inverts this pattern, i.e. the agent is a Python application that uses the Pybag package to access the native kd interface over COM. drgn follows this pattern.

+

That said, a quick look at the launchers in the dbgeng project (under debugger-launchers) shows .bat files, each of which calls a .py file in data/support. As drgn is a Linux-only debugger, we need to convert the .bat examples to .sh. Luckily, the conversion is pretty simple: most line annotations use # in place of :: and environment variables are referenced using $VAR in place of %VAR%.

+

The syntax of the .sh is typical of any *nix shell. In addition to the shell script, a launcher include a metadata header to populate its menu and options dialog. Annotations include:

+
    +
  • A #! line for the shell invocation
  • +
  • The Ghidra license
  • +
  • A #@title line for the launcher name
  • +
  • A #@desc-annotated HTML description, as displayed in the launch dialog
  • +
  • #@menu-group for organizing launchers
  • +
  • #@icon for an icon
  • +
  • #@help the help file and anchor
  • +
  • Some number of #@arg variables, usually only one to name the executable image
  • +
  • #@args specifies the remainder of the arguments, passed to a user-mode target if applicable
  • +
  • Some number of #@env variables referenced by the Python code
  • +
+

While the drgn launcher does not use @arg or @args, there are plentiful examples in the gdb project. The #@env lines are composed of the variable name (usually in caps), its type, default value, a label for the dialog if the user need to be queried, and a description. The syntax looks like:

+
    +
  • #@env Name : Type [ ! ] = DefaultValue Label Description
  • +
+

where !, if present, indicates the option is required.

+

For drgn, invoking the drgn command directly saves us a lot of the work involved in getting the environment correct. We pass it our Python launcher local-drgn.py instead of allowing it to call run_interactive, which does not return. Instead, we created an instance of prog based on the parameters, complete the Ghidra-specific initialization, and call run_interactive(prog) ourselves.

+

The Python script needs to do the setup work for Ghidra and for drgn. A good start is to try to implement a script that calls the methods for connect, create, and start, with create doing as little as possible initially. This should allow you to work the kinks out of arch.py and util.py.

+

For this particular target, there are some interesting wrinkles surrounding the use of sudo (required for most targets) which complicate where wheels are installed (i.e. it is pretty easy to accidentally mix user-local and system site-packages). Additionally, the -E parameter is required to ensure that the environment variable we defined get passed to the root environment. In the cases where we use sudo, the first message printed in the interactive shell will be the request for the user’s password.

+
+
+

The schema

+

The schema, specified in schema.xml, provides a basic structure for Ghidra’s Model View and allows Ghidra to identify and locate various interfaces that are used to populate the GUI. For example, the Memory interface identifies the container for items with the interface MemoryRegion, which provide information used to fill the Memory View. Among the important interfaces are Process, Thread, Frame, Register, MemoryRegion, Module, and Section. These interfaces are “built into” Ghidra so that it can identify which objects provide specific information and commands.

+

For the purposes of getting started, it’s easiest to clone the dbgeng schema and modify it as needed. Again, this will require substantial cleanup later on, but, as schema errors are frequently subtle and hard to identify, revisiting is probably the better approach. MANIFEST.in should be modfied to reflect the schema’s path.

+
+
+

The build logic

+

Similarly, build.gradle can essentially be cloned from dbgeng, with the appropriate change to eclipse.project.name. For the most part, you need only apply the distributableGhidraModule.gradle and hasPythonPackage.gradle scripts. If further customization is needed, consult other examples in the Ghidra project and Gradle’s documentation.

+

Not perhaps directly a build logic item, but pyproject.toml should be modified to reflect the agent’s version number (by convention, Ghidra’s version number).

+
+
+

The Python files

+

At this point, we can start actually implementing the drgn agent. arch.py is usually a good starting point, as much of the initial logic depends on it. For arch.py, the hard bit is knowing what maps to what. The language_map converts the debugger’s self-reported architecture to Ghidra’s language set. Ghidra’s languages are mapped to a set of language-to-compiler maps, which are then used to map the debugger’s self-reported language to Ghidra’s compiler. Certain combinations are not allowed because Ghidra has no concept of that language-compiler combination. For example, x86 languages never map to default. Hence, the need for a x86_compiler_map, which defaults to something else (in this case, gcc).

+

After arch.py, a first pass at util.py is probably warranted. In particular, the version info is used early in the startup process. A lot of this code is not relevant to our current project, but at a minimum we want to implement (or fake out) methods such as selected_process, selected_thread, and selected_frame. In this example, there probably won’t be more than one session or one process. Ultimately, we’ll have to decide whether we even want Session in the schema. For now, we’re defaulting session and process to 0, and thread to 1, as 0 is invalid for debugging the kernel. (Later, it becomes obvious that the attached pid and prog.main_thread().tid make sense for user-mode debugging, and prog.crashed_thread().tid makes sense for crash dump debugging.)

+

With arch.py and util.py good to a first approximation, we would normally start implementing put methods in commands.py for various objects in the Model View, starting at the root of the tree and descending through the children. Again, Session and Process are rather poorly-defined, so we skip them (leaving one each) and tackle Threads. Typically, for each iterator in the debugger API, two commands get implemented — one internal method that does the actual work, e.g. put_threads() and one invokable method that wraps this method in a (potentialy batched) transaction, e.g. ghidra_trace_put_threads(). The internal methods are meant to be called by other Python code, with the caller assumed to be responsible for setting up the transaction. The ghidra_trace-prefixed methods are meant to be part of the custom CLI command set which the user can invoke and therefore should set up the transaction. The internal method typically creates the path to the container using patterns for the container, individual keys, and the combination, e.g. THREADS_PATTERN, THREAD_KEY_PATTERN, and THREAD_PATTERN. Patterns are built up from other patterns, going back to the root. A trace object corresponding to the debugger object is created from the path and inserted into the trace database.

+

Once this code has been tested, attributes of the object can be added to the base object using set_value. Attributes that are not primitives can be added using the pattern create-populate-insert, i.e. we call create_object with extensions to the path, populate the object’s children, and call insert with the created object. In many cases (particularly when populating an object’s children is expensive), you may want to defer the populate step, effectively creating a placeholder that can be populated on-demand. The downside of this approach, of course, is that refresh methods must be added to populate those nodes.

+

As an aside, it’s probably worth noting the function of create_object and insert. Objects in the trace are maintained in a directory tree, with links (and backlinks) allowed, whose visible manifestation is the Model View. As such, operations on the tree follow the normal procedure for operations on a graph. create_object creates a node but not any edges, not even the implied (“canonical”) edge from parent to child. insert creates the canonical edge. Until that edge exists, the object is not considered to be “alive”, so the lifespan of the edge effectively encodes the object’s life. Following the create-populate-insert pattern, minimizes the number of events that need to be processed.

+

Having completed a single command, we can proceed in one of two directions — we can continue implementing commands for other objects in the tree, or we can implement matching refresh methods in methods.py for the completed object. methods.py also requires patterns which are used to match a path to a trace object, usually via find_x_by_pattern methods. The refresh methods may or may not rely on the find_by methods depending on whether the matching command needs parameters. For example, we may want to assume the selected_thread matches the current object in the view, in which case it can be used to locate that node, or we may want to force the method to match on the node if the trace object can be easily matched to the debugger object, or we may want to use the node to set selected_thread.

+

The concept of focus in the debugger is fairly complicated and a frequent source of confusion. In general, we use selected to represent the GUI’s current focus, typically the node in the Model or associated views which the user has selected. In some sense, it represents the process, thread, or frame the user is interested in. It also may differ from the highlighted node, chosen by a single-click (versus a double-click which sets the selection). By contrast, the native debugger has its own idea of focus, which we usually describe as current. (This concept is itself complicated by distinctions between the event object, e.g. which thread the debugger broke on, and the current object, e.g. which thread is being inspected.) Current values are pushed “up” to Ghidra’s GUI from the native debugger; selected values are pushed “down” to the native debugger from Ghidra. To the extent possible, it makes sense to synchronize these values. In other words, in most cases, a new selection should force a change in the set of current objects, and an event signaling a change in the current object should alter the GUI’s set of selected objects. (Of course, care needs to be taken not to make this a round-trip cycle.)

+

refresh methods (and others) are often annotated in several ways. The @REGISTRY.method annotation makes the method available to the GUI. It specifies the action to be taken and the display that appears in the GUI pop-up menu. Actions may be purely descriptive or may correspond to built-in actions taken by the GUI, e.g. refresh and many of the control methods, such as step_into. Parameters for the methods may be annotated with sch.Schema (conventionally on the first parameter) to indicate the nodes to which the method applies, and with ParamDesc to describe the parameter’s type and label for pop-up dialogs. After retrieving necessary parameters, refresh methods invoke methods from commands.py wrapped in a transaction.

+

For drgn, we implemented put/refresh methods for threads, frames, registers (putreg), and local variables, then modules and sections, memory and regions, the environment, and finally processes. We also implemented putmem using the drgn’s read API. Symbols was another possibility, but, for the moment, populating symbols seemed to expensive. Instead, retrieve_symbols was added to allow per-pattern symbols to be added. Unfortunately, the drgn API doesn’t support wildcards, so eventually some other strategy will be necessary.

+

The remaining set of Python functions, hooks.py, comprises callbacks for various events sent by the native debugger. The current drgn code has no event system. A set of skeletal methods has been left in place as (a) we can use the single-step button as a stand-in for “update state”, and (b) some discussion exists in the drgn user forums regarding eventually implementing more control functionality. For anyone implementing hooks.py, the challenging logic resides in the event loop, particularly if there is a need to move back-and-forth between the debugger and a repl. Also, distinctions need to be made between control commands, which wait for events, and commands which rely on a callback but complete immediately. As a rule-of-thumb, we push to Ghidra, i.e. Ghidra issue requests asynchronously and the agent must update the trace database.

+
+
+

Revisiting the schema

+

At this point, revisiting and editing the schema may be called for. For example, for drgn, it’s not obvious that there can ever be more than one session, so it may be cleaner to embed Processes at the root. This, in turn, requires editing the commands.py and methods.py patterns. Similarly, as breakpoints are not supported, the breakpoint-related entries may safely be deleted.

+

In general, the schema can be structured however you like, but there are several details worth mentioning. Interfaces generally need to be respected for various functions in the GUI to work. Process, thread, frame, module, section, and memory elements can be named arbitrarily, but their interfaces must be named correctly. Additionally, the logic for finding objects in the tree is quite complicated. If elements need be traversed as part of the default search process, their containers must be tagged canonical. If attributes need to be traversed, their parents should have the interface Aggregate.

+

Each entry may have elements of the same type ordered by keys, and attributes of arbitrary type. The element entry describes the schema for all elements; the schema for attributes may be given explicitly using named attribute entries or defaulted using the unnamed attribute entry, typically <attribute schema="VOID"> or <attribute schema="ANY">. The schema for any element in the Model View is visible using the hover, which helps substantially when trying to identify schema traversal errors.

+

Schema entries may be marked hidden=yes with the obvious result. Additionally, certain attribute names and schema have special properties. For example, _display defines the visible ID for an entry in the Model tree, and ADDRESS and RANGE mark attributes which are navigable.

+
+
+

Unit tests

+

The hardest part of writing unit tests is almost always getting the first test to run, and the easiest unit tests, as with the Python files, are those for commands.py. For drgn, as before, we’re using dbgeng as the pattern, but several elements had to be changed. Because the launchers execute a script, we need to amend the runThrowError logic (and, more specifically, the execInPython logic) in AbstractDrgnTraceRmiTest with a ProcessBuilder call that takes a script, rather than writing the script to stdin. While there, we can also trim out the unnecessary helper logic around items like breakpoints, watchpoints, etc. from all of the test classes.

+

JUnits for methods.py follow a similar pattern, but, again, getting the first one to run is often the most difficult. For drgn, we’ve had to override the timeouts in waitForPass and waitForCondition. After starting with hardcoded paths for the test target, we also had to add logic to re-write the PREAMBLE on-the-fly in execInDrgn. Obviously, with no real hooks.py logic, there’s no need for DrgnHooksTest.

+

Of note, we’ve used the gdb gcore command to create a core dump for the tests. Both user- and kernel-mode require privileges to run the debugger, and, for testing, that’s not ideal.

+
+
+

Documentation

+

The principal piece of documentation for all new debuggers is a description of the launchers. Right now, the TraceRmiLauncherServicePlugin.html file in Debug/Debugger-rmi-trace contains all of this information. Detail to note: the #@help locations in the launchers themselves ought to match the HTML tags in the file, as should the launcher names.

+
+
+

Extended features

+

Once everything else is done, it may be worth considering additional functionality specific to the debugger. This can be made available in either commands.py or methods.py. For drgn, we’ve added attach methods that allow the user to attach to additional programs.

+
+
+
+ + diff --git a/GhidraDocs/GhidraClass/Debugger/B5-AddingDebuggers.md b/GhidraDocs/GhidraClass/Debugger/B5-AddingDebuggers.md new file mode 100644 index 00000000000..4208700a368 --- /dev/null +++ b/GhidraDocs/GhidraClass/Debugger/B5-AddingDebuggers.md @@ -0,0 +1,224 @@ + +# Adding a debugger + +This module walks you through an example of how to add a debugger agent to Ghidra. +It has no exercises and is certainly not the only way to implement an agent, but hopefully contains some useful pointers and highlights some pit-falls that you might encounter. +The example traces the implementation of an actual agent — the agent for *Meta*'s **drgn** debugger, which provides a scriptable, albeit read-only, interface to the running Linux kernel, as well as user-mode and core-dump targets. + +## Debugger documentation + +- Recommended reading: **drgn** () +- Also: **drgn (docs)** () + +## Anatomy of a Ghidra debugger agent + +To support debugging on various platforms, the Ghidra debugger has *agents*, i.e. clients capable of receiving information from a native debugger and passing it to the Ghidra GUI. +They include the **dbgeng** agent that supports Windows debuggers, the **gdb** agent for gdb on a variery of platforms, the **lldb** agent for macOS and Linux, and the **jpda** agent for Java. +All but the last are written in Python 3, and all communicate with the GUI via a protobuf-based protocol described in [Debugger-rmi-trace](../../../Ghidra/Debug/Debugger-rmi-trace/src/main/proto/trace-rmi.proto). + +At the highest level, each agent has four elements (ok, a somewhat arbitrary division, but...): + +* [`debugger-launchers`](../../../Ghidra/Debug/Debugger-agent-drgn/data/debugger-launchers) – A set of launchers, often a mixture of `.bat`,`.sh`, and sometime `.py` scripts +* [`schema.xml`](../../../Ghidra/Debug/Debugger-agent-drgn/src/main/py/src/ghidradrgn/schema.xml) – An object-model schema. (While expressed in XML, this is not an "XML schema".) +* [`src/ghidradrgn`](../../../Ghidra/Debug/Debugger-agent-drgn/src/main/py/src/ghidradrgn) – Python files for architecture, commands, hooks, methods, and common utility functions +* [`build.gradle`](../../../Ghidra/Debug/Debugger-agent-drgn/build.gradle) – Build logic + +Large portions of each are identical or similar across agents, so, as a general strategy, copying an existing agent and renaming all agent-specific variables, methods, etc. is not the worst plan of action. Typically, this leads to large chunks of detritus that need to be edited out late in the development process. + +## drgn as an Example + +### The first launcher — `local-drgn.sh` + +The initial objective is to create a shell that sets up the environment variables for parameters we'll need and invokes the target. +For this project, I originally started duplicating the **lldb** agent and then switched to the **dbgeng** agent. +Why? The hardest part of writing an agent is getting the initial launch pattern correct. +**drgn** is itself written in Python. +While gdb and lldb support Python as scripting languages, their cores are not Python-based. +For these debuggers, the launcher runs the native debugger and instructs it to load our plugin, which is the agent. +The dbgeng agent inverts this pattern, i.e. the agent is a Python application that uses the **Pybag** package to access the native *kd* interface over COM. +**drgn** follows this pattern. + +That said, a quick look at the launchers in the **dbgeng** project (under [`debugger-launchers`](../../../Ghidra/Debug/Debugger-agent-dbgeng/data/debugger-launchers)) shows `.bat` files, each of which calls a `.py` file in [`data/support`](../../../Ghidra/Debug/Debugger-agent-dbgeng/data/support). +As **drgn** is a Linux-only debugger, we need to convert the `.bat` examples to `.sh`. +Luckily, the conversion is pretty simple: most line annotations use `#` in place of `::` and environment variables are referenced using `$VAR` in place of `%VAR%`. + +The syntax of the `.sh` is typical of any *\*nix* shell. +In addition to the shell script, a launcher include a metadata header to populate its menu and options dialog. +Annotations include: + +* A `#!` line for the shell invocation +* The Ghidra license +* A `#@title` line for the launcher name +* A `#@desc`-annotated HTML description, as displayed in the launch dialog +* `#@menu-group` for organizing launchers +* `#@icon` for an icon +* `#@help` the help file and anchor +* Some number of `#@arg` variables, usually only one to name the executable image +* `#@args` specifies the remainder of the arguments, passed to a user-mode target if applicable +* Some number of `#@env` variables referenced by the Python code + +While the **drgn** launcher does not use `@arg` or `@args`, there are plentiful examples +in the [**gdb** project](../../../Ghidra/Debug/Debugger-agent-gdb/data/debugger-launchers). +The `#@env` lines are composed of the variable name (usually in caps), its type, default value, a label for the dialog if the user need to be queried, and a description. +The syntax looks like: + +* `#@env` *Name* `:` *Type* [ `!` ] `=` *DefaultValue* *Label* *Description* + +where `!`, if present, indicates the option is required. + +For **drgn**, invoking the `drgn` command directly saves us a lot of the work involved in getting the environment correct. +We pass it our Python launcher `local-drgn.py` instead of allowing it to call `run_interactive`, which does not return. +Instead, we created an instance of `prog` based on the parameters, complete the Ghidra-specific initialization, and call `run_interactive(prog)` ourselves. + +The Python script needs to do the setup work for Ghidra and for **drgn**. +A good start is to try to implement a script that calls the methods for `connect`, `create`, and `start`, with `create` doing as little as possible initially. +This should allow you to work the kinks out of `arch.py` and `util.py`. + +For this particular target, there are some interesting wrinkles surrounding the use of `sudo` (required for most targets) which complicate where wheels are installed (i.e. it is pretty easy to accidentally mix user-local and system `site-packages`). +Additionally, the `-E` parameter is required to ensure that the environment variable we defined get passed to the root environment. +In the cases where we use `sudo`, the first message printed in the interactive shell will be the request for the user's password. + +### The schema + +The schema, specified in `schema.xml`, provides a basic structure for Ghidra's **Model** View and allows Ghidra to identify and locate various interfaces that are used to populate the GUI. +For example, the *Memory* interface identifies the container for items with the interface *MemoryRegion*, which provide information used to fill the **Memory** View. +Among the important interfaces are *Process*, *Thread*, *Frame*, *Register*, *MemoryRegion*, *Module*, and *Section*. +These interfaces are "built into" Ghidra so that it can identify which objects provide specific information and commands. + +For the purposes of getting started, it's easiest to clone the **dbgeng** schema and modify it as needed. +Again, this will require substantial cleanup later on, but, as schema errors are frequently subtle and hard to identify, revisiting is probably the better approach. +`MANIFEST.in` should be modfied to reflect the schema's path. + +### The build logic + +Similarly, `build.gradle` can essentially be cloned from **dbgeng**, with the appropriate change to `eclipse.project.name`. +For the most part, you need only apply the `distributableGhidraModule.gradle` and `hasPythonPackage.gradle` scripts. +If further customization is needed, consult other examples in the Ghidra project and Gradle's documentation. + +Not perhaps directly a build logic item, but `pyproject.toml` should be modified to reflect the agent's version number (by convention, Ghidra's version number). + +### The Python files + +At this point, we can start actually implementing the **drgn** agent. +`arch.py` is usually a good starting point, as much of the initial logic depends on it. +For `arch.py`, the hard bit is knowing what maps to what. +The `language_map` converts the debugger's self-reported architecture to Ghidra's language set. +Ghidra's languages are mapped to a set of language-to-compiler maps, which are then used to map the debugger's self-reported language to Ghidra's compiler. +Certain combinations are not allowed because Ghidra has no concept of that language-compiler combination. +For example, x86 languages never map to `default`. +Hence, the need for a `x86_compiler_map`, which defaults to something else (in this case, `gcc`). + +After `arch.py`, a first pass at `util.py` is probably warranted. +In particular, the version info is used early in the startup process. +A lot of this code is not relevant to our current project, but at a minimum we want to implement (or fake out) methods such as `selected_process`, `selected_thread`, and `selected_frame`. +In this example, there probably won't be more than one session or one process. +Ultimately, we'll have to decide whether we even want *Session* in the schema. +For now, we're defaulting session and process to 0, and thread to 1, as 0 is invalid for debugging the kernel. +(Later, it becomes obvious that the attached pid and `prog.main_thread().tid` make sense for user-mode debugging, and `prog.crashed_thread().tid` makes sense for crash dump debugging.) + +With `arch.py` and `util.py` good to a first approximation, we would normally start implementing `put` methods in `commands.py` for various objects in the **Model** View, starting at the root of the tree and descending through the children. +Again, *Session* and *Process* are rather poorly-defined, so we skip them (leaving one each) and tackle *Threads*. +Typically, for each iterator in the debugger API, two commands get implemented — one internal method that does the actual work, e.g. `put_threads()` and one invokable method that wraps this method in a (potentialy batched) transaction, e.g. `ghidra_trace_put_threads()`. +The internal methods are meant to be called by other Python code, with the caller assumed to be responsible for setting up the transaction. +The `ghidra_trace`-prefixed methods are meant to be part of the custom CLI command set which the user can invoke and therefore should set up the transaction. +The internal method typically creates the path to the container using patterns for the container, individual keys, and the combination, e.g. `THREADS_PATTERN`, `THREAD_KEY_PATTERN`, and `THREAD_PATTERN`. +Patterns are built up from other patterns, going back to the root. +A trace object corresponding to the debugger object is created from the path and inserted into the trace database. + +Once this code has been tested, attributes of the object can be added to the base object using `set_value`. +Attributes that are not primitives can be added using the pattern create-populate-insert, i.e. we call `create_object` with extensions to the path, populate the object's children, and call `insert` with the created object. +In many cases (particularly when populating an object's children is expensive), you may want to defer the populate step, effectively creating a placeholder that can be populated on-demand. +The downside of this approach, of course, is that *refresh* methods must be added to populate those nodes. + +As an aside, it's probably worth noting the function of `create_object` and `insert`. +Objects in the trace are maintained in a directory tree, with links (and backlinks) allowed, whose visible manifestation is the **Model** View. +As such, operations on the tree follow the normal procedure for operations on a graph. +`create_object` creates a node but not any edges, not even the implied ("canonical") edge from parent to child. +`insert` creates the canonical edge. +Until that edge exists, the object is not considered to be "alive", so the lifespan of the edge effectively encodes the object's life. +Following the create-populate-insert pattern, minimizes the number of events that need to be processed. + +Having completed a single command, we can proceed in one of two directions — we can continue implementing commands for other objects in the tree, or we can implement matching *refresh* methods in `methods.py` for the completed object. +`methods.py` also requires patterns which are used to match a path to a trace object, usually via `find_x_by_pattern` methods. +The `refresh` methods may or may not rely on the `find_by` methods depending on whether the matching command needs parameters. +For example, we may want to assume the `selected_thread` matches the current object in the view, in which case it can be used to locate that node, or we may want to force the method to match on the node if the trace object can be easily matched to the debugger object, or we may want to use the node to set `selected_thread`. + +The concept of focus in the debugger is fairly complicated and a frequent source of confusion. +In general, we use *selected* to represent the GUI's current focus, typically the node in the **Model** or associated views which the user has selected. +In some sense, it represents the process, thread, or frame the user is interested in. +It also may differ from the *highlighted* node, chosen by a single-click (versus a double-click which sets the *selection*). +By contrast, the native debugger has its own idea of focus, which we usually describe as *current*. +(This concept is itself complicated by distinctions between the *event* object, e.g. which thread the debugger broke on, and the *current* object, e.g. which thread is being inspected.) +*Current* values are pushed "up" to Ghidra's GUI from the native debugger; *selected* values are pushed "down" to the native debugger from Ghidra. +To the extent possible, it makes sense to synchronize these values. +In other words, in most cases, a new *selection* should force a change in the set of *current* objects, and an event signaling a change in the *current* object should alter the GUI's set of *selected* objects. +(Of course, care needs to be taken not to make this a round-trip cycle.) + +`refresh` methods (and others) are often annotated in several ways. +The `@REGISTRY.method` annotation makes the method available to the GUI. +It specifies the `action` to be taken and the `display` that appears in the GUI pop-up menu. +*Actions* may be purely descriptive or may correspond to built-in actions taken by the GUI, e.g. `refresh` and many of the control methods, such as `step_into`. +Parameters for the methods may be annotated with `sch.Schema` (conventionally on the first parameter) to indicate the nodes to which the method applies, and with `ParamDesc` to describe the parameter's type and label for pop-up dialogs. +After retrieving necessary parameters, `refresh` methods invoke methods from `commands.py` wrapped in a transaction. + +For **drgn**, we implemented `put`/`refresh` methods for threads, frames, registers (`putreg`), and local variables, then modules and sections, memory and regions, the environment, and finally processes. +We also implemented `putmem` using the **drgn**'s `read` API. +*Symbols* was another possibility, but, for the moment, populating symbols seemed to expensive. +Instead, `retrieve_symbols` was added to allow per-pattern symbols to be added. +Unfortunately, the **drgn** API doesn't support wildcards, so eventually some other strategy will be necessary. + +The remaining set of Python functions, `hooks.py`, comprises callbacks for various events sent by the native debugger. +The current **drgn** code has no event system. +A set of skeletal methods has been left in place as (a) we can use the single-step button as a stand-in for "update state", and (b) some discussion exists in the **drgn** user forums regarding eventually implementing more control functionality. +For anyone implementing `hooks.py`, the challenging logic resides in the event loop, particularly if there is a need to move back-and-forth between the debugger and a *repl*. +Also, distinctions need to be made between control commands, which wait for events, and commands which rely on a callback but complete immediately. +As a rule-of-thumb, we *push* to Ghidra, i.e. Ghidra issue requests asynchronously and the agent must update the trace database. + +### Revisiting the schema + +At this point, revisiting and editing the schema may be called for. +For example, for **drgn**, it's not obvious that there can ever be more than one session, so it may be cleaner to embed *Processes* at the root. +This, in turn, requires editing the `commands.py` and `methods.py` patterns. +Similarly, as breakpoints are not supported, the breakpoint-related entries may safely be deleted. + +In general, the schema can be structured however you like, but there are several details worth mentioning. +Interfaces generally need to be respected for various functions in the GUI to work. +Process, thread, frame, module, section, and memory elements can be named arbitrarily, but their interfaces must be named correctly. +Additionally, the logic for finding objects in the tree is quite complicated. +If elements need be traversed as part of the default search process, their containers must be tagged `canonical`. +If attributes need to be traversed, their parents should have the interface `Aggregate`. + +Each entry may have `elements` of the same type ordered by keys, and `attributes` of arbitrary type. +The `element` entry describes the schema for all elements; the schema for attributes may be given explicitly using named `attribute` entries or defaulted using the unnamed `attribute` entry, typically `` or ``. +The schema for any element in the **Model** View is visible using the hover, which helps substantially when trying to identify schema traversal errors. + +Schema entries may be marked `hidden=yes` with the obvious result. +Additionally, certain attribute names and schema have special properties. +For example, `_display` defines the visible ID for an entry in the **Model** tree, and `ADDRESS` and `RANGE` mark attributes which are navigable. + + +### Unit tests + +The hardest part of writing unit tests is almost always getting the first test to run, and the easiest unit tests, as with the Python files, are those for `commands.py`. +For **drgn**, as before, we're using **dbgeng** as the pattern, but several elements had to be changed. +Because the launchers execute a script, we need to amend the `runThrowError` logic (and, more specifically, the `execInPython` logic) in [`AbstractDrgnTraceRmiTest`](../../../Ghidra/Test/DebuggerIntegrationTest/src/test.slow/java/agent/drgn/rmi/AbstractDrgnTraceRmiTest.java) with a `ProcessBuilder` call that takes a script, rather than writing the script to stdin. +While there, we can also trim out the unnecessary helper logic around items like breakpoints, watchpoints, etc. from all of the test classes. + +JUnits for `methods.py` follow a similar pattern, but, again, getting the first one to run is often the most difficult. +For **drgn**, we've had to override the timeouts in `waitForPass` and `waitForCondition`. +After starting with hardcoded paths for the test target, we also had to add logic to re-write the `PREAMBLE` on-the-fly in `execInDrgn`. +Obviously, with no real `hooks.py` logic, there's no need for `DrgnHooksTest`. + +Of note, we've used the gdb `gcore` command to create a core dump for the tests. +Both user- and kernel-mode require privileges to run the debugger, and, for testing, that's not ideal. + +### Documentation + +The principal piece of documentation for all new debuggers is a description of the launchers. +Right now, the [`TraceRmiLauncherServicePlugin.html`](../../../Ghidra/Debug/Debugger-rmi-trace/src/main/help/help/topics/TraceRmiConnectionManagerPlugin/TraceRmiLauncherServicePlugin.html) file in `Debug/Debugger-rmi-trace` contains all of this information. +Detail to note: the `#@help` locations in the launchers themselves ought to match the HTML tags in the file, as should the launcher names. + +### Extended features + +Once everything else is done, it may be worth considering additional functionality specific to the debugger. This can be made available in either `commands.py` or `methods.py`. +For **drgn**, we've added `attach` methods that allow the user to attach to additional programs. \ No newline at end of file diff --git a/GhidraDocs/GhidraClass/Debugger/Makefile b/GhidraDocs/GhidraClass/Debugger/Makefile index 1946d49ece3..ff0a55419ae 100644 --- a/GhidraDocs/GhidraClass/Debugger/Makefile +++ b/GhidraDocs/GhidraClass/Debugger/Makefile @@ -17,6 +17,7 @@ all: \ B2-Emulation.html \ B3-Scripting.html \ B4-Modeling.html \ + B5-AddingDebuggers.html \ README.html clean: diff --git a/GhidraDocs/GhidraClass/Debugger/navhead.htm b/GhidraDocs/GhidraClass/Debugger/navhead.htm index 73c401f17d6..830899989de 100644 --- a/GhidraDocs/GhidraClass/Debugger/navhead.htm +++ b/GhidraDocs/GhidraClass/Debugger/navhead.htm @@ -8,5 +8,6 @@ class="advanced" href="B1-RemoteTargets.html">Remote TargetsEmulationScriptingModeling + class="advanced" href="B4-Modeling.html">ModelingAdding Debuggers diff --git a/GhidraDocs/certification.manifest b/GhidraDocs/certification.manifest index 85826860c5c..12ea58c5264 100644 --- a/GhidraDocs/certification.manifest +++ b/GhidraDocs/certification.manifest @@ -74,6 +74,8 @@ GhidraClass/Debugger/B3-Scripting.html||GHIDRA||||END| GhidraClass/Debugger/B3-Scripting.md||GHIDRA||||END| GhidraClass/Debugger/B4-Modeling.html||GHIDRA||||END| GhidraClass/Debugger/B4-Modeling.md||GHIDRA||||END| +GhidraClass/Debugger/B5-AddingDebuggers.html||GHIDRA||||END| +GhidraClass/Debugger/B5-AddingDebuggers.md||GHIDRA||||END| GhidraClass/Debugger/Makefile||GHIDRA||||END| GhidraClass/Debugger/README.html||GHIDRA||||END| GhidraClass/Debugger/README.md||GHIDRA||||END|