mirror of
https://github.com/bytecodealliance/wasm-micro-runtime.git
synced 2024-11-26 15:32:05 +00:00
fef26ead3e
And update the debug-tools sample.
414 lines
13 KiB
Python
414 lines
13 KiB
Python
#!/usr/bin/env python3
|
|
#
|
|
# Copyright (C) 2019 Intel Corporation. All rights reserved.
|
|
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
#
|
|
import argparse
|
|
import os
|
|
from pathlib import Path
|
|
import re
|
|
import shlex
|
|
import subprocess
|
|
import sys
|
|
|
|
"""
|
|
This is a tool to convert addresses, which are from a call-stack dump generated by iwasm, into line info for a wasm file.
|
|
|
|
When a wasm file is compiled with debug info, it is possible to transfer the address to line info.
|
|
|
|
For example, there is a call-stack dump:
|
|
|
|
```
|
|
#00: 0x0a04 - $f18
|
|
#01: 0x08e4 - $f11
|
|
#02: 0x096f - $f12
|
|
#03: 0x01aa - _start
|
|
```
|
|
|
|
- store the call-stack dump into a file, e.g. call_stack.txt
|
|
- run the following command to convert the address into line info:
|
|
```
|
|
$ cd test-tools/addr2line
|
|
$ python3 addr2line.py --wasi-sdk <wasi-sdk installation> --wabt <wabt installation> --wasm-file <wasm file path> call_stack.txt
|
|
```
|
|
The script will use *wasm-objdump* in wabt to transform address, then use *llvm-dwarfdump* to lookup the line info for each address
|
|
in the call-stack dump.
|
|
- if addresses are not available in the stack trace (i.e. iwasm <= 1.3.2) or iwasm is used in fast interpreter mode,
|
|
run the following command to convert the function index into line info (passing the `--no-addr` option):
|
|
```
|
|
$ python3 addr2line.py --wasi-sdk <wasi-sdk installation> --wabt <wabt installation> --wasm-file <wasm file path> call_stack.txt --no-addr
|
|
```
|
|
The script will use *wasm-objdump* in wabt to get the function names corresponding to function indexes, then use *llvm-dwarfdump* to lookup the line info for each
|
|
function index in the call-stack dump.
|
|
"""
|
|
|
|
|
|
def locate_sourceMappingURL_section(wasm_objdump: Path, wasm_file: Path) -> bool:
|
|
"""
|
|
Figure out if the wasm file has a sourceMappingURL section.
|
|
"""
|
|
cmd = f"{wasm_objdump} -h {wasm_file}"
|
|
p = subprocess.run(
|
|
shlex.split(cmd),
|
|
check=True,
|
|
capture_output=True,
|
|
text=True,
|
|
universal_newlines=True,
|
|
)
|
|
outputs = p.stdout.split(os.linesep)
|
|
|
|
for line in outputs:
|
|
line = line.strip()
|
|
if "sourceMappingURL" in line:
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
def get_code_section_start(wasm_objdump: Path, wasm_file: Path) -> int:
|
|
"""
|
|
Find the start offset of Code section in a wasm file.
|
|
|
|
if the code section header likes:
|
|
Code start=0x0000017c end=0x00004382 (size=0x00004206) count: 47
|
|
|
|
the start offset is 0x0000017c
|
|
"""
|
|
cmd = f"{wasm_objdump} -h {wasm_file}"
|
|
p = subprocess.run(
|
|
shlex.split(cmd),
|
|
check=True,
|
|
capture_output=True,
|
|
text=True,
|
|
universal_newlines=True,
|
|
)
|
|
outputs = p.stdout.split(os.linesep)
|
|
|
|
for line in outputs:
|
|
line = line.strip()
|
|
if "Code" in line:
|
|
return int(line.split()[1].split("=")[1], 16)
|
|
|
|
return -1
|
|
|
|
|
|
def get_line_info_from_function_addr_dwarf(
|
|
dwarf_dump: Path, wasm_file: Path, offset: int
|
|
) -> tuple[str, str, str, str]:
|
|
"""
|
|
Find the location info of a given offset in a wasm file.
|
|
"""
|
|
cmd = f"{dwarf_dump} --lookup={offset} {wasm_file}"
|
|
p = subprocess.run(
|
|
shlex.split(cmd),
|
|
check=False,
|
|
capture_output=True,
|
|
text=True,
|
|
universal_newlines=True,
|
|
)
|
|
outputs = p.stdout.split(os.linesep)
|
|
|
|
function_name, function_file = "<unknown>", "unknown"
|
|
function_line, function_column = "?", "?"
|
|
|
|
for line in outputs:
|
|
line = line.strip()
|
|
|
|
if "DW_AT_name" in line:
|
|
function_name = get_dwarf_tag_value("DW_AT_name", line)
|
|
|
|
if "DW_AT_decl_file" in line:
|
|
function_file = get_dwarf_tag_value("DW_AT_decl_file", line)
|
|
|
|
if "Line info" in line:
|
|
_, function_line, function_column = parse_line_info(line)
|
|
|
|
return (function_name, function_file, function_line, function_column)
|
|
|
|
|
|
def get_dwarf_tag_value(tag: str, line: str) -> str:
|
|
# Try extracting value as string
|
|
STR_PATTERN = rf"{tag}\s+\(\"(.*)\"\)"
|
|
m = re.match(STR_PATTERN, line)
|
|
if m:
|
|
return m.groups()[0]
|
|
|
|
# Try extracting value as integer
|
|
INT_PATTERN = rf"{tag}\s+\((\d+)\)"
|
|
m = re.match(INT_PATTERN, line)
|
|
return m.groups()[0]
|
|
|
|
|
|
def get_line_info_from_function_name_dwarf(
|
|
dwarf_dump: Path, wasm_file: Path, function_name: str
|
|
) -> tuple[str, str, str]:
|
|
"""
|
|
Find the location info of a given function in a wasm file.
|
|
"""
|
|
cmd = f"{dwarf_dump} --name={function_name} {wasm_file}"
|
|
p = subprocess.run(
|
|
shlex.split(cmd),
|
|
check=False,
|
|
capture_output=True,
|
|
text=True,
|
|
universal_newlines=True,
|
|
)
|
|
outputs = p.stdout.split(os.linesep)
|
|
|
|
function_name, function_file = "<unknown>", "unknown"
|
|
function_line = "?"
|
|
|
|
for line in outputs:
|
|
line = line.strip()
|
|
|
|
if "DW_AT_name" in line:
|
|
function_name = get_dwarf_tag_value("DW_AT_name", line)
|
|
|
|
if "DW_AT_decl_file" in line:
|
|
function_file = get_dwarf_tag_value("DW_AT_decl_file", line)
|
|
|
|
if "DW_AT_decl_line" in line:
|
|
function_line = get_dwarf_tag_value("DW_AT_decl_line", line)
|
|
|
|
return (function_name, function_file, function_line)
|
|
|
|
|
|
def get_line_info_from_function_addr_sourcemapping(
|
|
emsymbolizer: Path, wasm_file: Path, offset: int
|
|
) -> tuple[str, str, str, str]:
|
|
"""
|
|
Find the location info of a given offset in a wasm file which is compiled with emcc.
|
|
|
|
{emsymbolizer} {wasm_file} {offset of file}
|
|
|
|
there usually are two lines:
|
|
??
|
|
relative path to source file:line:column
|
|
"""
|
|
debug_info_source = wasm_file.with_name(f"{wasm_file.name}.map")
|
|
cmd = f"{emsymbolizer} -t code -f {debug_info_source} {wasm_file} {offset}"
|
|
p = subprocess.run(
|
|
shlex.split(cmd),
|
|
check=False,
|
|
capture_output=True,
|
|
text=True,
|
|
universal_newlines=True,
|
|
cwd=Path.cwd(),
|
|
)
|
|
outputs = p.stdout.split(os.linesep)
|
|
|
|
function_name, function_file = "<unknown>", "unknown"
|
|
function_line, function_column = "?", "?"
|
|
|
|
for line in outputs:
|
|
line = line.strip()
|
|
|
|
if not line:
|
|
continue
|
|
|
|
m = re.match("(.*):(\d+):(\d+)", line)
|
|
if m:
|
|
function_file, function_line, function_column = m.groups()
|
|
continue
|
|
else:
|
|
# it's always ??, not sure about that
|
|
if "??" != line:
|
|
function_name = line
|
|
|
|
return (function_name, function_file, function_line, function_column)
|
|
|
|
|
|
def parse_line_info(line_info: str) -> tuple[str, str, str]:
|
|
"""
|
|
line_info -> [file, line, column]
|
|
"""
|
|
PATTERN = r"Line info: file \'(.+)\', line ([0-9]+), column ([0-9]+)"
|
|
m = re.search(PATTERN, line_info)
|
|
assert m is not None
|
|
|
|
file, line, column = m.groups()
|
|
return (file, int(line), int(column))
|
|
|
|
|
|
def parse_call_stack_line(line: str) -> tuple[str, str, str]:
|
|
"""
|
|
New format (WAMR > 1.3.2):
|
|
#00: 0x0a04 - $f18 => (00, 0x0a04, $f18)
|
|
Old format:
|
|
#00 $f18 => (00, _, $f18)
|
|
Text format (-DWAMR_BUILD_LOAD_CUSTOM_SECTION=1 -DWAMR_BUILD_CUSTOM_NAME_SECTION=1):
|
|
#02: 0x0200 - a => (02, 0x0200, a)
|
|
_start (always):
|
|
#05: 0x011f - _start => (05, 0x011f, _start)
|
|
"""
|
|
|
|
# New format and Text format and _start
|
|
PATTERN = r"#([0-9]+): 0x([0-9a-f]+) - (\S+)"
|
|
m = re.match(PATTERN, line)
|
|
if m is not None:
|
|
return m.groups()
|
|
|
|
# Old format
|
|
PATTERN = r"#([0-9]+) (\S+)"
|
|
m = re.match(PATTERN, line)
|
|
if m is not None:
|
|
return (m.groups()[0], None, m.groups()[1])
|
|
|
|
return None
|
|
|
|
|
|
def parse_module_functions(wasm_objdump: Path, wasm_file: Path) -> dict[str, str]:
|
|
function_index_to_name = {}
|
|
|
|
cmd = f"{wasm_objdump} -x {wasm_file} --section=function"
|
|
p = subprocess.run(
|
|
shlex.split(cmd),
|
|
check=True,
|
|
capture_output=True,
|
|
text=True,
|
|
universal_newlines=True,
|
|
)
|
|
outputs = p.stdout.split(os.linesep)
|
|
|
|
for line in outputs:
|
|
if not f"func[" in line:
|
|
continue
|
|
|
|
PATTERN = r".*func\[([0-9]+)\].*<(.*)>"
|
|
m = re.match(PATTERN, line)
|
|
assert m is not None
|
|
|
|
index = m.groups()[0]
|
|
name = m.groups()[1]
|
|
function_index_to_name[index] = name
|
|
|
|
return function_index_to_name
|
|
|
|
|
|
def demangle(cxxfilt: Path, function_name: str) -> str:
|
|
cmd = f"{cxxfilt} -n {function_name}"
|
|
p = subprocess.run(
|
|
shlex.split(cmd),
|
|
check=True,
|
|
capture_output=True,
|
|
text=True,
|
|
universal_newlines=True,
|
|
)
|
|
return p.stdout.strip()
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="addr2line for wasm")
|
|
parser.add_argument("--wasi-sdk", type=Path, help="path to wasi-sdk")
|
|
parser.add_argument("--wabt", type=Path, help="path to wabt")
|
|
parser.add_argument("--wasm-file", type=Path, help="path to wasm file")
|
|
parser.add_argument("call_stack_file", type=Path, help="path to a call stack file")
|
|
parser.add_argument(
|
|
"--no-addr",
|
|
action="store_true",
|
|
help="use call stack without addresses or from fast interpreter mode",
|
|
)
|
|
parser.add_argument("--emsdk", type=Path, help="path to emsdk")
|
|
args = parser.parse_args()
|
|
|
|
wasm_objdump = args.wabt.joinpath("bin/wasm-objdump")
|
|
assert wasm_objdump.exists()
|
|
|
|
llvm_dwarf_dump = args.wasi_sdk.joinpath("bin/llvm-dwarfdump")
|
|
assert llvm_dwarf_dump.exists()
|
|
|
|
llvm_cxxfilt = args.wasi_sdk.joinpath("bin/llvm-cxxfilt")
|
|
assert llvm_cxxfilt.exists()
|
|
|
|
emcc_production = locate_sourceMappingURL_section(wasm_objdump, args.wasm_file)
|
|
if emcc_production:
|
|
if args.emsdk is None:
|
|
print("Please provide the path to emsdk via --emsdk")
|
|
return -1
|
|
|
|
emsymbolizer = args.emsdk.joinpath("upstream/emscripten/emsymbolizer")
|
|
assert emsymbolizer.exists()
|
|
|
|
code_section_start = get_code_section_start(wasm_objdump, args.wasm_file)
|
|
if code_section_start == -1:
|
|
return -1
|
|
|
|
function_index_to_name = parse_module_functions(wasm_objdump, args.wasm_file)
|
|
|
|
assert args.call_stack_file.exists()
|
|
with open(args.call_stack_file, "rt", encoding="ascii") as f:
|
|
for i, line in enumerate(f):
|
|
line = line.strip()
|
|
if not line:
|
|
continue
|
|
|
|
splitted = parse_call_stack_line(line)
|
|
if splitted is None:
|
|
print(f"{line}")
|
|
continue
|
|
|
|
_, offset, index = splitted
|
|
if args.no_addr:
|
|
# FIXME: w/ emcc production
|
|
if not index.startswith("$f"): # E.g. _start or Text format
|
|
print(f"{i}: {index}")
|
|
continue
|
|
index = index[2:]
|
|
|
|
if index not in function_index_to_name:
|
|
print(f"{i}: {line}")
|
|
continue
|
|
|
|
if not emcc_production:
|
|
_, function_file, function_line = (
|
|
get_line_info_from_function_name_dwarf(
|
|
llvm_dwarf_dump,
|
|
args.wasm_file,
|
|
function_index_to_name[index],
|
|
)
|
|
)
|
|
else:
|
|
_, function_file, function_line = _, "unknown", "?"
|
|
|
|
function_name = demangle(llvm_cxxfilt, function_index_to_name[index])
|
|
print(f"{i}: {function_name}")
|
|
print(f"\tat {function_file}:{function_line}")
|
|
else:
|
|
offset = int(offset, 16)
|
|
# match the algorithm in wasm_interp_create_call_stack()
|
|
# either a *offset* to *code* section start
|
|
# or a *offset* in a file
|
|
assert offset > code_section_start
|
|
offset = offset - code_section_start
|
|
|
|
if emcc_production:
|
|
function_name, function_file, function_line, function_column = (
|
|
get_line_info_from_function_addr_sourcemapping(
|
|
emsymbolizer, args.wasm_file, offset
|
|
)
|
|
)
|
|
else:
|
|
function_name, function_file, function_line, function_column = (
|
|
get_line_info_from_function_addr_dwarf(
|
|
llvm_dwarf_dump, args.wasm_file, offset
|
|
)
|
|
)
|
|
|
|
# if can't parse function_name, use name section or <index>
|
|
if function_name == "<unknown>":
|
|
if index.startswith("$f"):
|
|
function_name = function_index_to_name.get(index[2:], index)
|
|
else:
|
|
function_name = index
|
|
|
|
function_name = demangle(llvm_cxxfilt, function_name)
|
|
|
|
print(f"{i}: {function_name}")
|
|
print(f"\tat {function_file}:{function_line}:{function_column}")
|
|
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|