mirror of
				https://github.com/bytecodealliance/wasm-micro-runtime.git
				synced 2025-10-26 10:51:17 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			414 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			414 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| #!/usr/bin/env python3
 | |
| #
 | |
| # Copyright (C) 2019 Intel Corporation.  All rights reserved.
 | |
| # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 | |
| #
 | |
| import argparse
 | |
| import os
 | |
| from pathlib import Path
 | |
| import re
 | |
| import shlex
 | |
| import subprocess
 | |
| import sys
 | |
| 
 | |
| """
 | |
| This is a tool to convert addresses, which are from a call-stack dump generated by iwasm, into line info for a wasm file.
 | |
| 
 | |
| When a wasm file is compiled with debug info, it is possible to transfer the address to line info.
 | |
| 
 | |
| For example, there is a call-stack dump:
 | |
| 
 | |
| ```
 | |
| #00: 0x0a04 - $f18
 | |
| #01: 0x08e4 - $f11
 | |
| #02: 0x096f - $f12
 | |
| #03: 0x01aa - _start
 | |
| ```
 | |
| 
 | |
| - store the call-stack dump into a file, e.g. call_stack.txt
 | |
| - run the following command to convert the address into line info:
 | |
|   ```
 | |
|   $ cd test-tools/addr2line
 | |
|   $ python3 addr2line.py --wasi-sdk <wasi-sdk installation> --wabt <wabt installation> --wasm-file <wasm file path> call_stack.txt
 | |
|   ```
 | |
|   The script will use *wasm-objdump* in wabt to transform address, then use *llvm-dwarfdump* to lookup the line info for each address
 | |
|   in the call-stack dump.
 | |
| - if addresses are not available in the stack trace (i.e. iwasm <= 1.3.2) or iwasm is used in fast interpreter mode,
 | |
|   run the following command to convert the function index into line info (passing the `--no-addr` option):
 | |
|   ```
 | |
|   $ python3 addr2line.py --wasi-sdk <wasi-sdk installation> --wabt <wabt installation> --wasm-file <wasm file path> call_stack.txt --no-addr
 | |
|   ```
 | |
|   The script will use *wasm-objdump* in wabt to get the function names corresponding to function indexes, then use *llvm-dwarfdump* to lookup the line info for each
 | |
|   function index in the call-stack dump.
 | |
| """
 | |
| 
 | |
| 
 | |
| def locate_sourceMappingURL_section(wasm_objdump: Path, wasm_file: Path) -> bool:
 | |
|     """
 | |
|     Figure out if the wasm file has a sourceMappingURL section.
 | |
|     """
 | |
|     cmd = f"{wasm_objdump} -h {wasm_file}"
 | |
|     p = subprocess.run(
 | |
|         shlex.split(cmd),
 | |
|         check=True,
 | |
|         capture_output=True,
 | |
|         text=True,
 | |
|         universal_newlines=True,
 | |
|     )
 | |
|     outputs = p.stdout.split(os.linesep)
 | |
| 
 | |
|     for line in outputs:
 | |
|         line = line.strip()
 | |
|         if "sourceMappingURL" in line:
 | |
|             return True
 | |
| 
 | |
|     return False
 | |
| 
 | |
| 
 | |
| def get_code_section_start(wasm_objdump: Path, wasm_file: Path) -> int:
 | |
|     """
 | |
|     Find the start offset of Code section in a wasm file.
 | |
| 
 | |
|     if the code section header likes:
 | |
|       Code start=0x0000017c end=0x00004382 (size=0x00004206) count: 47
 | |
| 
 | |
|     the start offset is 0x0000017c
 | |
|     """
 | |
|     cmd = f"{wasm_objdump} -h {wasm_file}"
 | |
|     p = subprocess.run(
 | |
|         shlex.split(cmd),
 | |
|         check=True,
 | |
|         capture_output=True,
 | |
|         text=True,
 | |
|         universal_newlines=True,
 | |
|     )
 | |
|     outputs = p.stdout.split(os.linesep)
 | |
| 
 | |
|     for line in outputs:
 | |
|         line = line.strip()
 | |
|         if "Code" in line:
 | |
|             return int(line.split()[1].split("=")[1], 16)
 | |
| 
 | |
|     return -1
 | |
| 
 | |
| 
 | |
| def get_line_info_from_function_addr_dwarf(
 | |
|     dwarf_dump: Path, wasm_file: Path, offset: int
 | |
| ) -> tuple[str, str, str, str]:
 | |
|     """
 | |
|     Find the location info of a given offset in a wasm file.
 | |
|     """
 | |
|     cmd = f"{dwarf_dump} --lookup={offset} {wasm_file}"
 | |
|     p = subprocess.run(
 | |
|         shlex.split(cmd),
 | |
|         check=False,
 | |
|         capture_output=True,
 | |
|         text=True,
 | |
|         universal_newlines=True,
 | |
|     )
 | |
|     outputs = p.stdout.split(os.linesep)
 | |
| 
 | |
|     function_name, function_file = "<unknown>", "unknown"
 | |
|     function_line, function_column = "?", "?"
 | |
| 
 | |
|     for line in outputs:
 | |
|         line = line.strip()
 | |
| 
 | |
|         if "DW_AT_name" in line:
 | |
|             function_name = get_dwarf_tag_value("DW_AT_name", line)
 | |
| 
 | |
|         if "DW_AT_decl_file" in line:
 | |
|             function_file = get_dwarf_tag_value("DW_AT_decl_file", line)
 | |
| 
 | |
|         if "Line info" in line:
 | |
|             _, function_line, function_column = parse_line_info(line)
 | |
| 
 | |
|     return (function_name, function_file, function_line, function_column)
 | |
| 
 | |
| 
 | |
| def get_dwarf_tag_value(tag: str, line: str) -> str:
 | |
|     # Try extracting value as string
 | |
|     STR_PATTERN = rf"{tag}\s+\(\"(.*)\"\)"
 | |
|     m = re.match(STR_PATTERN, line)
 | |
|     if m:
 | |
|         return m.groups()[0]
 | |
| 
 | |
|     # Try extracting value as integer
 | |
|     INT_PATTERN = rf"{tag}\s+\((\d+)\)"
 | |
|     m = re.match(INT_PATTERN, line)
 | |
|     return m.groups()[0]
 | |
| 
 | |
| 
 | |
| def get_line_info_from_function_name_dwarf(
 | |
|     dwarf_dump: Path, wasm_file: Path, function_name: str
 | |
| ) -> tuple[str, str, str]:
 | |
|     """
 | |
|     Find the location info of a given function in a wasm file.
 | |
|     """
 | |
|     cmd = f"{dwarf_dump} --name={function_name} {wasm_file}"
 | |
|     p = subprocess.run(
 | |
|         shlex.split(cmd),
 | |
|         check=False,
 | |
|         capture_output=True,
 | |
|         text=True,
 | |
|         universal_newlines=True,
 | |
|     )
 | |
|     outputs = p.stdout.split(os.linesep)
 | |
| 
 | |
|     function_name, function_file = "<unknown>", "unknown"
 | |
|     function_line = "?"
 | |
| 
 | |
|     for line in outputs:
 | |
|         line = line.strip()
 | |
| 
 | |
|         if "DW_AT_name" in line:
 | |
|             function_name = get_dwarf_tag_value("DW_AT_name", line)
 | |
| 
 | |
|         if "DW_AT_decl_file" in line:
 | |
|             function_file = get_dwarf_tag_value("DW_AT_decl_file", line)
 | |
| 
 | |
|         if "DW_AT_decl_line" in line:
 | |
|             function_line = get_dwarf_tag_value("DW_AT_decl_line", line)
 | |
| 
 | |
|     return (function_name, function_file, function_line)
 | |
| 
 | |
| 
 | |
| def get_line_info_from_function_addr_sourcemapping(
 | |
|     emsymbolizer: Path, wasm_file: Path, offset: int
 | |
| ) -> tuple[str, str, str, str]:
 | |
|     """
 | |
|     Find the location info of a given offset in a wasm file which is compiled with emcc.
 | |
| 
 | |
|     {emsymbolizer} {wasm_file} {offset of file}
 | |
| 
 | |
|     there usually are two lines:
 | |
|     ??
 | |
|     relative path to source file:line:column
 | |
|     """
 | |
|     debug_info_source = wasm_file.with_name(f"{wasm_file.name}.map")
 | |
|     cmd = f"{emsymbolizer} -t code -f {debug_info_source} {wasm_file} {offset}"
 | |
|     p = subprocess.run(
 | |
|         shlex.split(cmd),
 | |
|         check=False,
 | |
|         capture_output=True,
 | |
|         text=True,
 | |
|         universal_newlines=True,
 | |
|         cwd=Path.cwd(),
 | |
|     )
 | |
|     outputs = p.stdout.split(os.linesep)
 | |
| 
 | |
|     function_name, function_file = "<unknown>", "unknown"
 | |
|     function_line, function_column = "?", "?"
 | |
| 
 | |
|     for line in outputs:
 | |
|         line = line.strip()
 | |
| 
 | |
|         if not line:
 | |
|             continue
 | |
| 
 | |
|         m = re.match("(.*):(\d+):(\d+)", line)
 | |
|         if m:
 | |
|             function_file, function_line, function_column = m.groups()
 | |
|             continue
 | |
|         else:
 | |
|             # it's always ??, not sure about that
 | |
|             if "??" != line:
 | |
|                 function_name = line
 | |
| 
 | |
|     return (function_name, function_file, function_line, function_column)
 | |
| 
 | |
| 
 | |
| def parse_line_info(line_info: str) -> tuple[str, str, str]:
 | |
|     """
 | |
|     line_info -> [file, line, column]
 | |
|     """
 | |
|     PATTERN = r"Line info: file \'(.+)\', line ([0-9]+), column ([0-9]+)"
 | |
|     m = re.search(PATTERN, line_info)
 | |
|     assert m is not None
 | |
| 
 | |
|     file, line, column = m.groups()
 | |
|     return (file, int(line), int(column))
 | |
| 
 | |
| 
 | |
| def parse_call_stack_line(line: str) -> tuple[str, str, str]:
 | |
|     """
 | |
|     New format (WAMR > 1.3.2):
 | |
|     #00: 0x0a04 - $f18   => (00, 0x0a04, $f18)
 | |
|     Old format:
 | |
|     #00 $f18             => (00, _, $f18)
 | |
|     Text format (-DWAMR_BUILD_LOAD_CUSTOM_SECTION=1 -DWAMR_BUILD_CUSTOM_NAME_SECTION=1):
 | |
|     #02: 0x0200 - a      => (02, 0x0200, a)
 | |
|     _start (always):
 | |
|     #05: 0x011f - _start => (05, 0x011f, _start)
 | |
|     """
 | |
| 
 | |
|     # New format and Text format and _start
 | |
|     PATTERN = r"#([0-9]+): 0x([0-9a-f]+) - (\S+)"
 | |
|     m = re.match(PATTERN, line)
 | |
|     if m is not None:
 | |
|         return m.groups()
 | |
| 
 | |
|     # Old format
 | |
|     PATTERN = r"#([0-9]+) (\S+)"
 | |
|     m = re.match(PATTERN, line)
 | |
|     if m is not None:
 | |
|         return (m.groups()[0], None, m.groups()[1])
 | |
| 
 | |
|     return None
 | |
| 
 | |
| 
 | |
| def parse_module_functions(wasm_objdump: Path, wasm_file: Path) -> dict[str, str]:
 | |
|     function_index_to_name = {}
 | |
| 
 | |
|     cmd = f"{wasm_objdump} -x {wasm_file} --section=function"
 | |
|     p = subprocess.run(
 | |
|         shlex.split(cmd),
 | |
|         check=True,
 | |
|         capture_output=True,
 | |
|         text=True,
 | |
|         universal_newlines=True,
 | |
|     )
 | |
|     outputs = p.stdout.split(os.linesep)
 | |
| 
 | |
|     for line in outputs:
 | |
|         if not f"func[" in line:
 | |
|             continue
 | |
| 
 | |
|         PATTERN = r".*func\[([0-9]+)\].*<(.*)>"
 | |
|         m = re.match(PATTERN, line)
 | |
|         assert m is not None
 | |
| 
 | |
|         index = m.groups()[0]
 | |
|         name = m.groups()[1]
 | |
|         function_index_to_name[index] = name
 | |
| 
 | |
|     return function_index_to_name
 | |
| 
 | |
| 
 | |
| def demangle(cxxfilt: Path, function_name: str) -> str:
 | |
|     cmd = f"{cxxfilt} -n {function_name}"
 | |
|     p = subprocess.run(
 | |
|         shlex.split(cmd),
 | |
|         check=True,
 | |
|         capture_output=True,
 | |
|         text=True,
 | |
|         universal_newlines=True,
 | |
|     )
 | |
|     return p.stdout.strip()
 | |
| 
 | |
| 
 | |
| def main():
 | |
|     parser = argparse.ArgumentParser(description="addr2line for wasm")
 | |
|     parser.add_argument("--wasi-sdk", type=Path, help="path to wasi-sdk")
 | |
|     parser.add_argument("--wabt", type=Path, help="path to wabt")
 | |
|     parser.add_argument("--wasm-file", type=Path, help="path to wasm file")
 | |
|     parser.add_argument("call_stack_file", type=Path, help="path to a call stack file")
 | |
|     parser.add_argument(
 | |
|         "--no-addr",
 | |
|         action="store_true",
 | |
|         help="use call stack without addresses or from fast interpreter mode",
 | |
|     )
 | |
|     parser.add_argument("--emsdk", type=Path, help="path to emsdk")
 | |
|     args = parser.parse_args()
 | |
| 
 | |
|     wasm_objdump = args.wabt.joinpath("bin/wasm-objdump")
 | |
|     assert wasm_objdump.exists()
 | |
| 
 | |
|     llvm_dwarf_dump = args.wasi_sdk.joinpath("bin/llvm-dwarfdump")
 | |
|     assert llvm_dwarf_dump.exists()
 | |
| 
 | |
|     llvm_cxxfilt = args.wasi_sdk.joinpath("bin/llvm-cxxfilt")
 | |
|     assert llvm_cxxfilt.exists()
 | |
| 
 | |
|     emcc_production = locate_sourceMappingURL_section(wasm_objdump, args.wasm_file)
 | |
|     if emcc_production:
 | |
|         if args.emsdk is None:
 | |
|             print("Please provide the path to emsdk via --emsdk")
 | |
|             return -1
 | |
| 
 | |
|         emsymbolizer = args.emsdk.joinpath("upstream/emscripten/emsymbolizer")
 | |
|         assert emsymbolizer.exists()
 | |
| 
 | |
|     code_section_start = get_code_section_start(wasm_objdump, args.wasm_file)
 | |
|     if code_section_start == -1:
 | |
|         return -1
 | |
| 
 | |
|     function_index_to_name = parse_module_functions(wasm_objdump, args.wasm_file)
 | |
| 
 | |
|     assert args.call_stack_file.exists()
 | |
|     with open(args.call_stack_file, "rt", encoding="ascii") as f:
 | |
|         for i, line in enumerate(f):
 | |
|             line = line.strip()
 | |
|             if not line:
 | |
|                 continue
 | |
| 
 | |
|             splitted = parse_call_stack_line(line)
 | |
|             if splitted is None:
 | |
|                 print(f"{line}")
 | |
|                 continue
 | |
| 
 | |
|             _, offset, index = splitted
 | |
|             if args.no_addr:
 | |
|                 # FIXME: w/ emcc production
 | |
|                 if not index.startswith("$f"):  # E.g. _start or Text format
 | |
|                     print(f"{i}: {index}")
 | |
|                     continue
 | |
|                 index = index[2:]
 | |
| 
 | |
|                 if index not in function_index_to_name:
 | |
|                     print(f"{i}: {line}")
 | |
|                     continue
 | |
| 
 | |
|                 if not emcc_production:
 | |
|                     _, function_file, function_line = (
 | |
|                         get_line_info_from_function_name_dwarf(
 | |
|                             llvm_dwarf_dump,
 | |
|                             args.wasm_file,
 | |
|                             function_index_to_name[index],
 | |
|                         )
 | |
|                     )
 | |
|                 else:
 | |
|                     _, function_file, function_line = _, "unknown", "?"
 | |
| 
 | |
|                 function_name = demangle(llvm_cxxfilt, function_index_to_name[index])
 | |
|                 print(f"{i}: {function_name}")
 | |
|                 print(f"\tat {function_file}:{function_line}")
 | |
|             else:
 | |
|                 offset = int(offset, 16)
 | |
|                 # match the algorithm in wasm_interp_create_call_stack()
 | |
|                 # either a *offset* to *code* section start
 | |
|                 # or a *offset* in a file
 | |
|                 assert offset > code_section_start
 | |
|                 offset = offset - code_section_start
 | |
| 
 | |
|                 if emcc_production:
 | |
|                     function_name, function_file, function_line, function_column = (
 | |
|                         get_line_info_from_function_addr_sourcemapping(
 | |
|                             emsymbolizer, args.wasm_file, offset
 | |
|                         )
 | |
|                     )
 | |
|                 else:
 | |
|                     function_name, function_file, function_line, function_column = (
 | |
|                         get_line_info_from_function_addr_dwarf(
 | |
|                             llvm_dwarf_dump, args.wasm_file, offset
 | |
|                         )
 | |
|                     )
 | |
| 
 | |
|                 # if can't parse function_name, use name section or <index>
 | |
|                 if function_name == "<unknown>":
 | |
|                     if index.startswith("$f"):
 | |
|                         function_name = function_index_to_name.get(index[2:], index)
 | |
|                     else:
 | |
|                         function_name = index
 | |
| 
 | |
|                 function_name = demangle(llvm_cxxfilt, function_name)
 | |
| 
 | |
|                 print(f"{i}: {function_name}")
 | |
|                 print(f"\tat {function_file}:{function_line}:{function_column}")
 | |
| 
 | |
|     return 0
 | |
| 
 | |
| 
 | |
| if __name__ == "__main__":
 | |
|     sys.exit(main())
 | 
