Fix some relocation type issues in windows (#574)

Implement Windows PE file relocation type IMAGE_REL_AMD64_ADDR64/ADDR32/REL32, implement relocation for symbol "__xmm@xxx"/"__plt@xxx"/".rdata", implement Windows invokeNative simd asm code and enable SIMD by default for windows platform. Also update wamrc tool.

Signed-off-by: Wenyong Huang <wenyong.huang@intel.com>
This commit is contained in:
Wenyong Huang 2021-03-16 03:59:16 -05:00 committed by GitHub
parent afa1feb1a8
commit fda3a26903
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 385 additions and 30 deletions

View File

@ -15,6 +15,8 @@
#include "../interpreter/wasm_loader.h"
#endif
#define XMM_PLT_PREFIX "__xmm@"
#define REAL_PLT_PREFIX "__real@"
static void
set_error_buf(char *error_buf, uint32 error_buf_size, const char *string)
@ -1280,12 +1282,13 @@ get_data_section_addr(AOTModule *module, const char *section_name,
uint32 i;
AOTObjectDataSection *data_section = module->data_sections;
for (i = 0; i < module->data_section_count; i++, data_section++)
for (i = 0; i < module->data_section_count; i++, data_section++) {
if (!strcmp(data_section->name, section_name)) {
if (p_data_size)
*p_data_size = data_section->size;
return data_section->data;
}
}
return NULL;
}
@ -1313,6 +1316,53 @@ is_literal_relocation(const char *reloc_sec_name)
return !strcmp(reloc_sec_name, ".rela.literal");
}
#if defined(BH_PLATFORM_WINDOWS)
static bool
str2uint32(const char *buf, uint32 *p_res)
{
uint32 res = 0, val;
const char *buf_end = buf + 8;
char ch;
while (buf < buf_end) {
ch = *buf++;
if (ch >= '0' && ch <= '9')
val = ch - '0';
else if (ch >= 'a' && ch <= 'f')
val = ch - 'a' + 0xA;
else if (ch >= 'A' && ch <= 'F')
val = ch - 'A' + 0xA;
else
return false;
res = (res << 4) | val;
}
*p_res = res;
return true;
}
static bool
str2uint64(const char *buf, uint64 *p_res)
{
uint64 res = 0, val;
const char *buf_end = buf + 16;
char ch;
while (buf < buf_end) {
ch = *buf++;
if (ch >= '0' && ch <= '9')
val = ch - '0';
else if (ch >= 'a' && ch <= 'f')
val = ch - 'a' + 0xA;
else if (ch >= 'A' && ch <= 'F')
val = ch - 'A' + 0xA;
else
return false;
res = (res << 4) | val;
}
*p_res = res;
return true;
}
#endif
static bool
do_text_relocation(AOTModule *module,
AOTRelocationGroup *group,
@ -1322,6 +1372,9 @@ do_text_relocation(AOTModule *module,
uint8 *aot_text = is_literal ? module->literal : module->code;
uint32 aot_text_size = is_literal ? module->literal_size : module->code_size;
uint32 i, func_index, symbol_len;
#if defined(BH_PLATFORM_WINDOWS)
uint32 xmm_plt_index = 0, real_plt_index = 0, float_plt_index = 0;
#endif
char symbol_buf[128] = { 0 }, *symbol, *p;
void *symbol_addr;
AOTRelocation *relocation = group->relocations;
@ -1360,6 +1413,7 @@ do_text_relocation(AOTModule *module,
symbol_addr = module->code;
}
else if (!strcmp(symbol, ".data")
|| !strcmp(symbol, ".rdata")
|| !strcmp(symbol, ".rodata")
/* ".rodata.cst4/8/16/.." */
|| !strncmp(symbol, ".rodata.cst", strlen(".rodata.cst"))) {
@ -1373,9 +1427,66 @@ do_text_relocation(AOTModule *module,
else if (!strcmp(symbol, ".literal")) {
symbol_addr = module->literal;
}
#if defined(BH_PLATFORM_WINDOWS)
else if (!strcmp(group->section_name, ".text")
&& !strncmp(symbol, XMM_PLT_PREFIX, strlen(XMM_PLT_PREFIX))
&& strlen(symbol) == strlen(XMM_PLT_PREFIX) + 32) {
char xmm_buf[17] = { 0 };
symbol_addr = module->extra_plt_data + xmm_plt_index * 16;
bh_memcpy_s(xmm_buf, sizeof(xmm_buf),
symbol + strlen(XMM_PLT_PREFIX) + 16, 16);
if (!str2uint64(xmm_buf, (uint64*)symbol_addr)) {
set_error_buf(error_buf, error_buf,
"resolve symbol %s failed", symbol);
goto check_symbol_fail;
}
bh_memcpy_s(xmm_buf, sizeof(xmm_buf),
symbol + strlen(XMM_PLT_PREFIX), 16);
if (!str2uint64(xmm_buf, (uint64*)((uint8*)symbol_addr + 8))) {
set_error_buf(error_buf, error_buf,
"resolve symbol %s failed", symbol);
goto check_symbol_fail;
}
xmm_plt_index++;
}
else if (!strcmp(group->section_name, ".text")
&& !strncmp(symbol, REAL_PLT_PREFIX, strlen(REAL_PLT_PREFIX))
&& strlen(symbol) == strlen(REAL_PLT_PREFIX) + 16) {
char real_buf[17] = { 0 };
symbol_addr = module->extra_plt_data + module->xmm_plt_count * 16
+ real_plt_index * 8;
bh_memcpy_s(real_buf, sizeof(real_buf),
symbol + strlen(REAL_PLT_PREFIX), 16);
if (!str2uint64(real_buf, (uint64*)symbol_addr)) {
set_error_buf(error_buf, error_buf,
"resolve symbol %s failed", symbol);
goto check_symbol_fail;
}
real_plt_index++;
}
else if (!strcmp(group->section_name, ".text")
&& !strncmp(symbol, REAL_PLT_PREFIX, strlen(REAL_PLT_PREFIX))
&& strlen(symbol) == strlen(REAL_PLT_PREFIX) + 8) {
char float_buf[9] = { 0 };
symbol_addr = module->extra_plt_data + module->xmm_plt_count * 16
+ module->real_plt_count * 8 + float_plt_index * 4;
bh_memcpy_s(float_buf, sizeof(float_buf),
symbol + strlen(REAL_PLT_PREFIX), 8);
if (!str2uint32(float_buf, (uint32*)symbol_addr)) {
set_error_buf(error_buf, error_buf,
"resolve symbol %s failed", symbol);
goto check_symbol_fail;
}
float_plt_index++;
}
#endif /* end of defined(BH_PLATFORM_WINDOWS) */
else if (!(symbol_addr = resolve_target_sym(symbol, &symbol_index))) {
set_error_buf_v(error_buf, error_buf_size,
"resolve symbol %s failed", symbol);
"resolve symbol %s failed", symbol);
goto check_symbol_fail;
}
@ -1418,6 +1529,9 @@ do_data_relocation(AOTModule *module,
else if (!strncmp(group->section_name, ".rel.", 5)) {
data_section_name = group->section_name + strlen(".rel");
}
else if (!strcmp(group->section_name, ".rdata")) {
data_section_name = group->section_name;
}
else {
set_error_buf(error_buf, error_buf_size,
"invalid data relocation section name");
@ -1426,6 +1540,7 @@ do_data_relocation(AOTModule *module,
data_addr = get_data_section_addr(module, data_section_name,
&data_size);
if (group->relocation_count > 0 && !data_addr) {
set_error_buf(error_buf, error_buf_size,
"invalid data relocation count");
@ -1514,6 +1629,106 @@ load_relocation_section(const uint8 *buf, const uint8 *buf_end,
goto fail;
}
#if defined(BH_PLATFORM_WINDOWS)
buf = symbol_buf_end;
read_uint32(buf, buf_end, group_count);
for (i = 0; i < group_count; i++) {
uint32 name_index, relocation_count;
uint16 group_name_len;
uint8 *group_name;
/* section name address is 4 bytes aligned. */
buf = (uint8*)align_ptr(buf, sizeof(uint32));
read_uint32(buf, buf_end, name_index);
if (name_index >= symbol_count) {
set_error_buf(error_buf, error_buf_size,
"symbol index out of range");
goto fail;
}
group_name = symbol_buf + symbol_offsets[name_index];
group_name_len = *(uint16 *)group_name;
group_name += sizeof(uint16);
read_uint32(buf, buf_end, relocation_count);
for (j = 0; j < relocation_count; j++) {
AOTRelocation relocation = { 0 };
uint32 symbol_index, offset32, addend32;
uint16 symbol_name_len;
uint8 *symbol_name;
if (sizeof(void *) == 8) {
read_uint64(buf, buf_end, relocation.relocation_offset);
read_uint64(buf, buf_end, relocation.relocation_addend);
}
else {
read_uint32(buf, buf_end, offset32);
relocation.relocation_offset = (uint64)offset32;
read_uint32(buf, buf_end, addend32);
relocation.relocation_addend = (uint64)addend32;
}
read_uint32(buf, buf_end, relocation.relocation_type);
read_uint32(buf, buf_end, symbol_index);
if (symbol_index >= symbol_count) {
set_error_buf(error_buf, error_buf_size,
"symbol index out of range");
goto fail;
}
symbol_name = symbol_buf + symbol_offsets[symbol_index];
symbol_name_len = *(uint16 *)symbol_name;
symbol_name += sizeof(uint16);
char group_name_buf[128] = { 0 };
char symbol_name_buf[128] = { 0 };
memcpy(group_name_buf, group_name, group_name_len);
memcpy(symbol_name_buf, symbol_name, symbol_name_len);
if (group_name_len == strlen(".text")
&& !strncmp(group_name, ".text", strlen(".text"))) {
if (symbol_name_len == strlen(XMM_PLT_PREFIX) + 32
&& !strncmp(symbol_name, XMM_PLT_PREFIX,
strlen(XMM_PLT_PREFIX))) {
module->xmm_plt_count++;
}
else if (symbol_name_len == strlen(REAL_PLT_PREFIX) + 16
&& !strncmp(symbol_name, REAL_PLT_PREFIX,
strlen(REAL_PLT_PREFIX))) {
module->real_plt_count++;
}
else if (symbol_name_len == strlen(REAL_PLT_PREFIX) + 8
&& !strncmp(symbol_name, REAL_PLT_PREFIX,
strlen(REAL_PLT_PREFIX))) {
module->float_plt_count++;
}
}
}
}
/* Allocate memory for extra plt data */
size = sizeof(uint64) * 2 * module->xmm_plt_count
+ sizeof(uint64) * module->real_plt_count
+ sizeof(uint32) * module->float_plt_count;
if (size > 0) {
int map_prot = MMAP_PROT_READ | MMAP_PROT_WRITE | MMAP_PROT_EXEC;
/* aot code and data in x86_64 must be in range 0 to 2G due to
relocation for R_X86_64_32/32S/PC32 */
int map_flags = MMAP_MAP_32BIT;
if (size > UINT32_MAX
|| !(module->extra_plt_data = os_mmap(NULL, (uint32)size,
map_prot, map_flags))) {
set_error_buf(error_buf, error_buf_size, "mmap memory failed");
goto fail;
}
module->extra_plt_data_size = (uint32)size;
}
#endif /* end of defined(BH_PLATFORM_WINDOWS) */
buf = symbol_buf_end;
read_uint32(buf, buf_end, group_count);
@ -1614,6 +1829,8 @@ load_relocation_section(const uint8 *buf, const uint8 *buf_end,
}
}
/* TODO: set code and data read only */
ret = true;
fail:
@ -2304,10 +2521,17 @@ aot_unload(AOTModule *module)
if (module->code) {
uint8 *mmap_addr = module->literal - sizeof(module->literal_size);
uint32 total_size = sizeof(module->literal_size) + module->literal_size + module->code_size;
uint32 total_size = sizeof(module->literal_size)
+ module->literal_size + module->code_size;
os_munmap(mmap_addr, total_size);
}
#if defined(BH_PLATFORM_WINDOWS)
if (module->extra_plt_data) {
os_munmap(module->extra_plt_data, module->extra_plt_data_size);
}
#endif
if (module->data_sections)
destroy_object_data_sections(module->data_sections,
module->data_section_count);

View File

@ -159,6 +159,17 @@ typedef struct AOTModule {
uint8 *literal;
uint32 literal_size;
#if (defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)) \
&& defined(BH_PLATFORM_WINDOWS)
/* extra plt data area for __xmm and __real constants
in Windows platform, NULL for JIT mode */
uint8 *extra_plt_data;
uint32 extra_plt_data_size;
uint32 xmm_plt_count;
uint32 real_plt_count;
uint32 float_plt_count;
#endif
/* data sections in AOT object file, including .data, .rodata
* and .rodata.cstN. NULL for JIT mode. */
AOTObjectDataSection *data_sections;

View File

@ -5,16 +5,20 @@
#include "aot_reloc.h"
#define R_X86_64_64 1 /* Direct 64 bit */
#define R_X86_64_PC32 2 /* PC relative 32 bit signed */
#define R_X86_64_PLT32 4 /* 32 bit PLT address */
#define R_X86_64_32 10 /* Direct 32 bit zero extended */
#define R_X86_64_32S 11 /* Direct 32 bit sign extended */
#if !defined(BH_PLATFORM_WINDOWS)
#define R_X86_64_64 1 /* Direct 64 bit */
#define R_X86_64_PC32 2 /* PC relative 32 bit signed */
#define R_X86_64_PLT32 4 /* 32 bit PLT address */
#define R_X86_64_32 10 /* Direct 32 bit zero extended */
#define R_X86_64_32S 11 /* Direct 32 bit sign extended */
#else
#define IMAGE_REL_AMD64_ADDR64 1 /* The 64-bit VA of the relocation target */
#define IMAGE_REL_AMD64_ADDR32 2 /* The 32-bit VA of the relocation target */
#define IMAGE_REL_AMD64_REL32 4 /* The 32-bit relative address from
the byte following the relocation*/
#endif
#define IMAGE_REL_AMD64_REL32 4 /* The 32-bit relative address from
the byte following the relocation */
#if defined(_WIN64) || defined(_WIN64_)
#if defined(BH_PLATFORM_WINDOWS)
#pragma function (floor)
#pragma function (ceil)
#pragma function (floorf)
@ -98,7 +102,11 @@ apply_relocation(AOTModule *module,
char *error_buf, uint32 error_buf_size)
{
switch (reloc_type) {
#if !defined(BH_PLATFORM_WINDOWS)
case R_X86_64_64:
#else
case IMAGE_REL_AMD64_ADDR64:
#endif
{
intptr_t value;
@ -108,6 +116,29 @@ apply_relocation(AOTModule *module,
= (uint8*)symbol_addr + reloc_addend + value; /* S + A */
break;
}
#if defined(BH_PLATFORM_WINDOWS)
case IMAGE_REL_AMD64_ADDR32:
{
int32 value;
uintptr_t target_addr;
CHECK_RELOC_OFFSET(sizeof(void *));
value = *(int32*)(target_section_addr + (uint32)reloc_offset);
target_addr = (uintptr_t)symbol_addr + reloc_addend + value;
if ((int32)target_addr != target_addr) {
set_error_buf(
error_buf, error_buf_size,
"AOT module load failed: "
"relocation truncated to fit IMAGE_REL_AMD64_ADDR32 failed. "
"Try using wamrc with --size-level=1 option.");
return false;
}
*(int32 *)(target_section_addr + reloc_offset) = (int32)target_addr;
break;
}
#endif
#if !defined(BH_PLATFORM_WINDOWS)
case R_X86_64_PC32:
{
intptr_t target_addr = (intptr_t) /* S + A - P */
@ -152,7 +183,12 @@ apply_relocation(AOTModule *module,
*(int32*)(target_section_addr + reloc_offset) = (int32)target_addr;
break;
}
#endif
#if !defined(BH_PLATFORM_WINDOWS)
case R_X86_64_PLT32:
#else
case IMAGE_REL_AMD64_REL32:
#endif
{
uint8 *plt;
intptr_t target_addr = 0;
@ -172,16 +208,21 @@ apply_relocation(AOTModule *module,
- (target_section_addr + reloc_offset));
}
#if defined(BH_PLATFORM_WINDOWS)
target_addr -= sizeof(int32);
#endif
if ((int32)target_addr != target_addr) {
set_error_buf(error_buf, error_buf_size,
"AOT module load failed: "
"relocation truncated to fit R_X86_64_PC32 failed. "
"relocation truncated to fit "
#if !defined(BH_PLATFORM_WINDOWS)
"R_X86_64_PLT32 failed. "
#else
"IMAGE_REL_AMD64_32 failed."
#endif
"Try using wamrc with --size-level=1 option.");
return false;
}
#ifdef BH_PLATFORM_WINDOWS
target_addr -= sizeof(int32);
#endif
*(int32*)(target_section_addr + reloc_offset) = (int32)target_addr;
break;
}

View File

@ -0,0 +1,62 @@
;
; Copyright (C) 2019 Intel Corporation. All rights reserved.
; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
;
_TEXT SEGMENT
; rcx func_ptr
; rdx argv
; r8 n_stacks
invokeNative PROC
push rbp
mov rbp, rsp
mov r10, rcx ; func_ptr
mov rax, rdx ; argv
mov rcx, r8 ; n_stacks
; fill all fp args
movdqu xmm0, xmmword ptr [rax + 0]
movdqu xmm1, xmmword ptr [rax + 16]
movdqu xmm2, xmmword ptr [rax + 32]
movdqu xmm3, xmmword ptr [rax + 48]
; check for stack args
cmp rcx, 0
jz cycle_end
mov rdx, rsp
and rdx, 15
jz no_abort
int 3
no_abort:
mov rdx, rcx
and rdx, 1
shl rdx, 3
sub rsp, rdx
; store stack args
lea r9, qword ptr [rax + rcx * 8 + 88]
sub r9, rsp ; offset
cycle:
push qword ptr [rsp + r9]
loop cycle
cycle_end:
mov rcx, [rax + 64]
mov rdx, [rax + 72]
mov r8, [rax + 80]
mov r9, [rax + 88]
sub rsp, 32 ; shadow space
call r10
leave
ret
invokeNative ENDP
_TEXT ENDS
END

View File

@ -19,7 +19,7 @@ if (WAMR_BUILD_TARGET STREQUAL "X86_64" OR WAMR_BUILD_TARGET STREQUAL "AMD_64")
endif ()
else ()
if (WAMR_BUILD_PLATFORM STREQUAL "windows")
message(FATAL_ERROR "need an implementation of SIMD on windows")
set (source_all ${c_source_all} ${IWASM_COMMON_DIR}/arch/invokeNative_em64_simd.asm)
else()
set (source_all ${c_source_all} ${IWASM_COMMON_DIR}/arch/invokeNative_em64_simd.s)
endif()

View File

@ -1675,16 +1675,24 @@ aot_resolve_literal(AOTObjectData *obj_data)
}
static bool
is_data_section(char *section_name)
get_relocations_count(LLVMSectionIteratorRef sec_itr, uint32 *p_count);
static bool
is_data_section(LLVMSectionIteratorRef sec_itr, char *section_name)
{
uint32 relocation_count = 0;
return (!strcmp(section_name, ".data")
|| !strcmp(section_name, ".rodata")
/* ".rodata.cst4/8/16/.." */
|| !strncmp(section_name, ".rodata.cst", strlen(".rodata.cst")));
|| !strncmp(section_name, ".rodata.cst", strlen(".rodata.cst"))
|| (!strcmp(section_name, ".rdata")
&& get_relocations_count(sec_itr, &relocation_count)
&& relocation_count > 0));
}
static uint32
get_object_data_sections_count(AOTObjectData *obj_data)
static bool
get_object_data_sections_count(AOTObjectData *obj_data, uint32 *p_count)
{
LLVMSectionIteratorRef sec_itr;
char *name;
@ -1692,18 +1700,19 @@ get_object_data_sections_count(AOTObjectData *obj_data)
if (!(sec_itr = LLVMObjectFileCopySectionIterator(obj_data->binary))) {
aot_set_last_error("llvm get section iterator failed.");
return 0;
return false;
}
while (!LLVMObjectFileIsSectionIteratorAtEnd(obj_data->binary, sec_itr)) {
if ((name = (char *)LLVMGetSectionName(sec_itr))
&& (is_data_section(name))) {
&& (is_data_section(sec_itr, name))) {
count++;
}
LLVMMoveToNextSection(sec_itr);
}
LLVMDisposeSectionIterator(sec_itr);
return count;
*p_count = count;
return true;
}
static bool
@ -1712,9 +1721,13 @@ aot_resolve_object_data_sections(AOTObjectData *obj_data)
LLVMSectionIteratorRef sec_itr;
char *name;
AOTObjectDataSection *data_section;
uint32 sections_count = get_object_data_sections_count(obj_data);
uint32 sections_count;
uint32 size;
if (!get_object_data_sections_count(obj_data, &sections_count)) {
return false;
}
if (sections_count > 0) {
size = (uint32)sizeof(AOTObjectDataSection) * sections_count;
if (!(data_section = obj_data->data_sections = wasm_runtime_malloc(size))) {
@ -1728,10 +1741,9 @@ aot_resolve_object_data_sections(AOTObjectData *obj_data)
aot_set_last_error("llvm get section iterator failed.");
return false;
}
while (!LLVMObjectFileIsSectionIteratorAtEnd(obj_data->binary,
sec_itr)) {
while (!LLVMObjectFileIsSectionIteratorAtEnd(obj_data->binary, sec_itr)) {
if ((name = (char *)LLVMGetSectionName(sec_itr))
&& (is_data_section(name))) {
&& (is_data_section(sec_itr, name))) {
data_section->name = name;
data_section->data = (uint8 *)LLVMGetSectionContents(sec_itr);
data_section->size = (uint32)LLVMGetSectionSize(sec_itr);
@ -1949,7 +1961,7 @@ is_relocation_section(LLVMSectionIteratorRef sec_itr)
if (name) {
if (is_relocation_section_name(name))
return true;
else if (!strncmp(name, ".text", strlen(".text"))
else if ((!strcmp(name, ".text") || !strcmp(name, ".rdata"))
&& get_relocations_count(sec_itr, &count) && count > 0)
return true;
}

View File

@ -58,7 +58,7 @@ endif ()
if (NOT DEFINED WAMR_BUILD_FAST_INTERP)
# Enable fast interpreter
set (WAMR_BUILD_FAST_INTERP 0)
set (WAMR_BUILD_FAST_INTERP 1)
endif ()
if (NOT DEFINED WAMR_BUILD_MULTI_MODULE)
@ -76,6 +76,11 @@ if (NOT DEFINED WAMR_BUILD_MINI_LOADER)
set (WAMR_BUILD_MINI_LOADER 0)
endif ()
if (NOT DEFINED WAMR_BUILD_SIMD)
# Enable SIMD by default
set (WAMR_BUILD_SIMD 1)
endif ()
if (COLLECT_CODE_COVERAGE EQUAL 1)
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fprofile-arcs -ftest-coverage")
endif ()