[refactoring] Extract read leb to a separate file, share the code between loader and mini loader (#3701)

There's probably a number of other places where the bh_leb_read could be used (e.g. aot loader)
but I'm making the change as small as possible. Further refactoring can be done later.
This commit is contained in:
Marcin Kolny 2024-08-14 01:46:24 +01:00 committed by GitHub
parent a83adccd20
commit 37d7439ef9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 223 additions and 159 deletions

View File

@ -3,14 +3,15 @@
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/
#include "wasm_loader_common.h"
#include "bh_leb128.h"
#include "bh_log.h"
#if WASM_ENABLE_GC != 0
#include "../common/gc/gc_type.h"
#endif
static void
set_error_buf(char *error_buf, uint32 error_buf_size, const char *string,
bool is_aot)
void
wasm_loader_set_error_buf(char *error_buf, uint32 error_buf_size,
const char *string, bool is_aot)
{
if (error_buf != NULL) {
snprintf(error_buf, error_buf_size, "%s module load failed: %s",
@ -29,30 +30,30 @@ wasm_memory_check_flags(const uint8 mem_flag, char *error_buf,
if (mem_flag & SHARED_MEMORY_FLAG) {
LOG_VERBOSE("shared memory flag was found, please enable shared "
"memory, lib-pthread or lib-wasi-threads");
set_error_buf(error_buf, error_buf_size, "invalid limits flags",
is_aot);
wasm_loader_set_error_buf(error_buf, error_buf_size,
"invalid limits flags", is_aot);
return false;
}
#endif
#if WASM_ENABLE_MEMORY64 == 0
if (mem_flag & MEMORY64_FLAG) {
LOG_VERBOSE("memory64 flag was found, please enable memory64");
set_error_buf(error_buf, error_buf_size, "invalid limits flags",
is_aot);
wasm_loader_set_error_buf(error_buf, error_buf_size,
"invalid limits flags", is_aot);
return false;
}
#endif
}
if (mem_flag > MAX_PAGE_COUNT_FLAG + SHARED_MEMORY_FLAG + MEMORY64_FLAG) {
set_error_buf(error_buf, error_buf_size, "invalid limits flags",
is_aot);
wasm_loader_set_error_buf(error_buf, error_buf_size,
"invalid limits flags", is_aot);
return false;
}
else if ((mem_flag & SHARED_MEMORY_FLAG)
&& !(mem_flag & MAX_PAGE_COUNT_FLAG)) {
set_error_buf(error_buf, error_buf_size,
"shared memory must have maximum", is_aot);
wasm_loader_set_error_buf(error_buf, error_buf_size,
"shared memory must have maximum", is_aot);
return false;
}
@ -130,3 +131,33 @@ is_indices_overflow(uint32 import, uint32 other, char *error_buf,
return false;
}
bool
read_leb(uint8 **p_buf, const uint8 *buf_end, uint32 maxbits, bool sign,
uint64 *p_result, char *error_buf, uint32 error_buf_size)
{
size_t offset = 0;
bh_leb_read_status_t status =
bh_leb_read(*p_buf, buf_end, maxbits, sign, p_result, &offset);
switch (status) {
case BH_LEB_READ_SUCCESS:
*p_buf += offset;
return true;
case BH_LEB_READ_TOO_LONG:
wasm_loader_set_error_buf(error_buf, error_buf_size,
"integer representation too long", false);
return false;
case BH_LEB_READ_OVERFLOW:
wasm_loader_set_error_buf(error_buf, error_buf_size,
"integer too large", false);
return false;
case BH_LEB_READ_UNEXPECTED_END:
wasm_loader_set_error_buf(error_buf, error_buf_size,
"unexpected end", false);
return false;
default:
bh_assert(false);
return false;
}
}

View File

@ -30,6 +30,14 @@ bool
is_indices_overflow(uint32 import, uint32 other, char *error_buf,
uint32 error_buf_size);
bool
read_leb(uint8 **p_buf, const uint8 *buf_end, uint32 maxbits, bool sign,
uint64 *p_result, char *error_buf, uint32 error_buf_size);
void
wasm_loader_set_error_buf(char *error_buf, uint32 error_buf_size,
const char *string, bool is_aot);
#ifdef __cplusplus
}
#endif

View File

@ -56,10 +56,7 @@ has_module_memory64(WASMModule *module)
static void
set_error_buf(char *error_buf, uint32 error_buf_size, const char *string)
{
if (error_buf != NULL) {
snprintf(error_buf, error_buf_size, "WASM module load failed: %s",
string);
}
wasm_loader_set_error_buf(error_buf, error_buf_size, string, false);
}
#if WASM_ENABLE_MEMORY64 != 0
@ -131,82 +128,6 @@ check_buf1(const uint8 *buf, const uint8 *buf_end, uint32 length,
#define skip_leb_int32(p, p_end) skip_leb(p)
#define skip_leb_mem_offset(p, p_end) skip_leb(p)
static bool
read_leb(uint8 **p_buf, const uint8 *buf_end, uint32 maxbits, bool sign,
uint64 *p_result, char *error_buf, uint32 error_buf_size)
{
const uint8 *buf = *p_buf;
uint64 result = 0;
uint32 shift = 0;
uint32 offset = 0, bcnt = 0;
uint64 byte;
while (true) {
/* uN or SN must not exceed ceil(N/7) bytes */
if (bcnt + 1 > (maxbits + 6) / 7) {
set_error_buf(error_buf, error_buf_size,
"integer representation too long");
return false;
}
CHECK_BUF(buf, buf_end, offset + 1);
byte = buf[offset];
offset += 1;
result |= ((byte & 0x7f) << shift);
shift += 7;
bcnt += 1;
if ((byte & 0x80) == 0) {
break;
}
}
if (!sign && maxbits == 32 && shift >= maxbits) {
/* The top bits set represent values > 32 bits */
if (((uint8)byte) & 0xf0)
goto fail_integer_too_large;
}
else if (sign && maxbits == 32) {
if (shift < maxbits) {
/* Sign extend, second-highest bit is the sign bit */
if ((uint8)byte & 0x40)
result |= (~((uint64)0)) << shift;
}
else {
/* The top bits should be a sign-extension of the sign bit */
bool sign_bit_set = ((uint8)byte) & 0x8;
int top_bits = ((uint8)byte) & 0xf0;
if ((sign_bit_set && top_bits != 0x70)
|| (!sign_bit_set && top_bits != 0))
goto fail_integer_too_large;
}
}
else if (sign && maxbits == 64) {
if (shift < maxbits) {
/* Sign extend, second-highest bit is the sign bit */
if ((uint8)byte & 0x40)
result |= (~((uint64)0)) << shift;
}
else {
/* The top bits should be a sign-extension of the sign bit */
bool sign_bit_set = ((uint8)byte) & 0x1;
int top_bits = ((uint8)byte) & 0xfe;
if ((sign_bit_set && top_bits != 0x7e)
|| (!sign_bit_set && top_bits != 0))
goto fail_integer_too_large;
}
}
*p_buf += offset;
*p_result = result;
return true;
fail_integer_too_large:
set_error_buf(error_buf, error_buf_size, "integer too large");
fail:
return false;
}
#define read_uint8(p) TEMPLATE_READ_VALUE(uint8, p)
#define read_uint32(p) TEMPLATE_READ_VALUE(uint32, p)

View File

@ -44,9 +44,7 @@ has_module_memory64(WASMModule *module)
static void
set_error_buf(char *error_buf, uint32 error_buf_size, const char *string)
{
if (error_buf != NULL)
snprintf(error_buf, error_buf_size, "WASM module load failed: %s",
string);
wasm_loader_set_error_buf(error_buf, error_buf_size, string, false);
}
#define CHECK_BUF(buf, buf_end, length) \
@ -95,71 +93,6 @@ is_byte_a_type(uint8 type)
|| (type == VALUE_TYPE_VOID);
}
static void
read_leb(uint8 **p_buf, const uint8 *buf_end, uint32 maxbits, bool sign,
uint64 *p_result, char *error_buf, uint32 error_buf_size)
{
const uint8 *buf = *p_buf;
uint64 result = 0;
uint32 shift = 0;
uint32 offset = 0, bcnt = 0;
uint64 byte;
while (true) {
bh_assert(bcnt + 1 <= (maxbits + 6) / 7);
CHECK_BUF(buf, buf_end, offset + 1);
byte = buf[offset];
offset += 1;
result |= ((byte & 0x7f) << shift);
shift += 7;
bcnt += 1;
if ((byte & 0x80) == 0) {
break;
}
}
if (!sign && maxbits == 32 && shift >= maxbits) {
/* The top bits set represent values > 32 bits */
bh_assert(!(((uint8)byte) & 0xf0));
}
else if (sign && maxbits == 32) {
if (shift < maxbits) {
/* Sign extend, second-highest bit is the sign bit */
if ((uint8)byte & 0x40)
result |= (~((uint64)0)) << shift;
}
else {
/* The top bits should be a sign-extension of the sign bit */
bool sign_bit_set = ((uint8)byte) & 0x8;
int top_bits = ((uint8)byte) & 0xf0;
bh_assert(!((sign_bit_set && top_bits != 0x70)
|| (!sign_bit_set && top_bits != 0)));
(void)top_bits;
(void)sign_bit_set;
}
}
else if (sign && maxbits == 64) {
if (shift < maxbits) {
/* Sign extend, second-highest bit is the sign bit */
if ((uint8)byte & 0x40)
result |= (~((uint64)0)) << shift;
}
else {
/* The top bits should be a sign-extension of the sign bit */
bool sign_bit_set = ((uint8)byte) & 0x1;
int top_bits = ((uint8)byte) & 0xfe;
bh_assert(!((sign_bit_set && top_bits != 0x7e)
|| (!sign_bit_set && top_bits != 0)));
(void)top_bits;
(void)sign_bit_set;
}
}
*p_buf += offset;
*p_result = result;
}
#define read_uint8(p) TEMPLATE_READ_VALUE(uint8, p)
#define read_uint32(p) TEMPLATE_READ_VALUE(uint32, p)
#define read_bool(p) TEMPLATE_READ_VALUE(bool, p)

View File

@ -0,0 +1,77 @@
/*
* Copyright (C) 2019 Intel Corporation. All rights reserved.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/
#include "bh_leb128.h"
bh_leb_read_status_t
bh_leb_read(const uint8 *buf, const uint8 *buf_end, uint32 maxbits, bool sign,
uint64 *p_result, size_t *p_offset)
{
uint64 result = 0;
uint32 shift = 0;
uint32 offset = 0, bcnt = 0;
uint64 byte;
while (true) {
/* uN or SN must not exceed ceil(N/7) bytes */
if (bcnt + 1 > (maxbits + 6) / 7) {
return BH_LEB_READ_TOO_LONG;
}
if ((uintptr_t)buf + offset + 1 < (uintptr_t)buf
|| (uintptr_t)buf + offset + 1 > (uintptr_t)buf_end) {
return BH_LEB_READ_UNEXPECTED_END;
}
byte = buf[offset];
offset += 1;
result |= ((byte & 0x7f) << shift);
shift += 7;
bcnt += 1;
if ((byte & 0x80) == 0) {
break;
}
}
if (!sign && maxbits == 32 && shift >= maxbits) {
/* The top bits set represent values > 32 bits */
if (((uint8)byte) & 0xf0)
return BH_LEB_READ_OVERFLOW;
}
else if (sign && maxbits == 32) {
if (shift < maxbits) {
/* Sign extend, second-highest bit is the sign bit */
if ((uint8)byte & 0x40)
result |= (~((uint64)0)) << shift;
}
else {
/* The top bits should be a sign-extension of the sign bit */
bool sign_bit_set = ((uint8)byte) & 0x8;
int top_bits = ((uint8)byte) & 0xf0;
if ((sign_bit_set && top_bits != 0x70)
|| (!sign_bit_set && top_bits != 0))
return BH_LEB_READ_OVERFLOW;
}
}
else if (sign && maxbits == 64) {
if (shift < maxbits) {
/* Sign extend, second-highest bit is the sign bit */
if ((uint8)byte & 0x40)
result |= (~((uint64)0)) << shift;
}
else {
/* The top bits should be a sign-extension of the sign bit */
bool sign_bit_set = ((uint8)byte) & 0x1;
int top_bits = ((uint8)byte) & 0xfe;
if ((sign_bit_set && top_bits != 0x7e)
|| (!sign_bit_set && top_bits != 0))
return BH_LEB_READ_OVERFLOW;
}
}
*p_offset = offset;
*p_result = result;
return BH_LEB_READ_SUCCESS;
}

View File

@ -0,0 +1,30 @@
/*
* Copyright (C) 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/
#ifndef _BH_LEB128_H
#define _BH_LEB128_H
#include "bh_platform.h"
typedef enum {
BH_LEB_READ_SUCCESS,
BH_LEB_READ_TOO_LONG,
BH_LEB_READ_OVERFLOW,
BH_LEB_READ_UNEXPECTED_END,
} bh_leb_read_status_t;
#ifdef __cplusplus
extern "C" {
#endif
bh_leb_read_status_t
bh_leb_read(const uint8 *buf, const uint8 *buf_end, uint32 maxbits, bool sign,
uint64 *p_result, size_t *p_offset);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -102,6 +102,7 @@ $(NAME)_SOURCES := ${SHARED_ROOT}/platform/alios/alios_platform.c \
${SHARED_ROOT}/utils/bh_common.c \
${SHARED_ROOT}/utils/bh_hashmap.c \
${SHARED_ROOT}/utils/bh_list.c \
${SHARED_ROOT}/utils/bh_leb128.c \
${SHARED_ROOT}/utils/bh_log.c \
${SHARED_ROOT}/utils/bh_queue.c \
${SHARED_ROOT}/utils/bh_vector.c \

View File

@ -441,6 +441,7 @@ CSRCS += nuttx_platform.c \
bh_common.c \
bh_hashmap.c \
bh_list.c \
bh_leb128.c \
bh_log.c \
bh_queue.c \
bh_vector.c \

View File

@ -0,0 +1,62 @@
/*
* Copyright (C) 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/
#include "bh_leb128.h"
#include "gtest/gtest.h"
#include <vector>
#include <type_traits>
template<typename T>
void
run_read_leb_test(std::vector<uint8_t> data,
bh_leb_read_status_t expected_status, T expected_value)
{
size_t offset = 0;
uint64 value;
bh_leb_read_status_t status =
bh_leb_read(data.data(), data.data() + data.size(), sizeof(T) * 8,
std::is_signed<T>::value, &value, &offset);
ASSERT_EQ(expected_status, status);
if (status == BH_LEB_READ_SUCCESS) {
ASSERT_EQ(data.size(), offset);
ASSERT_EQ(expected_value, (T)value);
}
}
TEST(bh_leb128_test_suite, read_leb_u32)
{
run_read_leb_test<uint32>({ 0 }, BH_LEB_READ_SUCCESS,
0); // min value
run_read_leb_test<uint32>({ 2 }, BH_LEB_READ_SUCCESS,
2); // single-byte value
run_read_leb_test<uint32>({ 127 }, BH_LEB_READ_SUCCESS,
127); // max single-byte value
run_read_leb_test<uint32>({ 128, 1 }, BH_LEB_READ_SUCCESS,
128); // min value with continuation bit
run_read_leb_test<uint32>({ 160, 138, 32 }, BH_LEB_READ_SUCCESS,
525600); // arbitrary value
run_read_leb_test<uint32>({ 255, 255, 255, 255, 15 }, BH_LEB_READ_SUCCESS,
UINT32_MAX); // max value
run_read_leb_test<uint32>({ 255, 255, 255, 255, 16 }, BH_LEB_READ_OVERFLOW,
UINT32_MAX); // overflow
run_read_leb_test<uint32>({ 255, 255, 255, 255, 128 }, BH_LEB_READ_TOO_LONG,
0);
run_read_leb_test<uint32>({ 128 }, BH_LEB_READ_UNEXPECTED_END, 0);
}
TEST(bh_leb128_test_suite, read_leb_i64)
{
run_read_leb_test<int64>({ 184, 188, 195, 159, 237, 209, 128, 2 },
BH_LEB_READ_SUCCESS,
1128712371232312); // arbitrary value
run_read_leb_test<int64>(
{ 128, 128, 128, 128, 128, 128, 128, 128, 128, 127 },
BH_LEB_READ_SUCCESS,
(uint64)INT64_MIN); // min value
run_read_leb_test<int64>({ 255, 255, 255, 255, 255, 255, 255, 255, 255, 0 },
BH_LEB_READ_SUCCESS,
INT64_MAX); // max value
}