From 53044a92761b596988ed36afd6ca094f4ed78bc7 Mon Sep 17 00:00:00 2001 From: pete Date: Fri, 26 Mar 2021 07:36:19 +0000 Subject: [PATCH 01/17] Check for constant blocks before decoding info --- Source/astcenc_entry.cpp | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/Source/astcenc_entry.cpp b/Source/astcenc_entry.cpp index f1e24cf13..2cd4366c2 100644 --- a/Source/astcenc_entry.cpp +++ b/Source/astcenc_entry.cpp @@ -1028,15 +1028,6 @@ astcenc_error astcenc_get_block_info( // Fetch the appropriate partition and decimation tables block_size_descriptor& bsd = *ctx->bsd; - int partition_count = scb.partition_count; - const partition_info* pt = get_partition_table(&bsd, partition_count); - pt += scb.partition_index; - - const int packed_index = bsd.block_mode_packed_index[scb.block_mode]; - assert(packed_index >= 0 && packed_index < bsd.block_mode_count); - const block_mode& bm = bsd.block_modes[packed_index]; - const decimation_table& dt = *bsd.decimation_tables[bm.decimation_mode]; - // Start from a clean slate memset(info, 0, sizeof(*info)); @@ -1048,21 +1039,31 @@ astcenc_error astcenc_get_block_info( info->block_z = ctx->config.block_z; info->texel_count = bsd.texel_count; - // Check for error blocks first ... + // Check for error blocks first - block_mode will be negative info->is_error_block = scb.error_block != 0; if (info->is_error_block) { return ASTCENC_SUCCESS; } - // Check for constant color blocks second ... + // Check for constant color blocks second - block_mode will be negative info->is_constant_block = scb.block_mode < 0; if (info->is_constant_block) { return ASTCENC_SUCCESS; } - // Otherwise, handle a full block with partition payload ... + // Otherwise, handle a full block with partition payload; values are known + // to be valid once the two conditions above have been checked + int partition_count = scb.partition_count; + const partition_info* pt = get_partition_table(&bsd, partition_count); + pt += scb.partition_index; + + const int packed_index = bsd.block_mode_packed_index[scb.block_mode]; + assert(packed_index >= 0 && packed_index < bsd.block_mode_count); + const block_mode& bm = bsd.block_modes[packed_index]; + const decimation_table& dt = *bsd.decimation_tables[bm.decimation_mode]; + info->weight_x = dt.weight_x; info->weight_y = dt.weight_y; info->weight_z = dt.weight_z; From 6bb9924a1fd751c499dc80a675c11bb2234ef84d Mon Sep 17 00:00:00 2001 From: Pete Harris Date: Fri, 26 Mar 2021 12:16:18 +0000 Subject: [PATCH 02/17] Update ChangeLog.md --- Docs/ChangeLog.md | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/Docs/ChangeLog.md b/Docs/ChangeLog.md index d13d55f91..72c469bde 100644 --- a/Docs/ChangeLog.md +++ b/Docs/ChangeLog.md @@ -9,12 +9,11 @@ clocked at 4.2 GHz, running astcenc using 6 threads. ## 2.5 -**Status:** In development, due late March 2021 +**Status:** Released, March 2021 -The 2.5 release is planned to be the last major release in the 2.x series. -After this release a `2.x` branch will be created to provide stable long-term -support, and the `main` branch will switch to focusing on more radical changes -for the 3.x series. +The 2.5 release is the last major release in the 2.x series. After this release +a `2.x` branch will provide stable long-term support, and the `main` branch +will switch to focusing on more radical changes for the 3.x series. Reminder for users of the library interface - the API is not designed to be stable across versions, and this release is not compatible with earlier 2.x @@ -42,14 +41,14 @@ releases. Please update and rebuild your client-side code using the updated * **API Change:** The decompression functionality in the core API has been changed to allow use of multiple threads. The design pattern matches the compression functionality, requiring the caller to create the threads, - synchronize them between images, and call the new - * **API Feature:** Defines to support exporting public API entry point + synchronize them between images, and to call the new + `astcenc_decompress_reset()` function between images. +* **API Feature:** Defines to support exporting public API entry point symbols from a shared object are provided, but not exposed off-the-shelf by the CMake provided by the project. - `astcenc_decompress_reset()` function between images. * **API Feature:** New `astcenc_get_block_info()` function added to the core API to allow users to perform high level analysis of compressed data. This - API is not implemented in decompressor-only builds. + API is not implemented in decompressor-only builds. * **API Feature:** Codec configuration structure has been extended to expose the new RGBM compression mode. See the API header for details. From 7889b7cbb02531c5ba3773ebe656f48e122a3c76 Mon Sep 17 00:00:00 2001 From: pete Date: Sun, 28 Mar 2021 14:32:15 +0100 Subject: [PATCH 03/17] Make platform_isa_detection use compiler config --- Source/astcenc_platform_isa_detection.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/Source/astcenc_platform_isa_detection.cpp b/Source/astcenc_platform_isa_detection.cpp index 2751d2da2..afb15e443 100644 --- a/Source/astcenc_platform_isa_detection.cpp +++ b/Source/astcenc_platform_isa_detection.cpp @@ -15,17 +15,19 @@ // under the License. // ---------------------------------------------------------------------------- -#if (ASTCENC_SSE > 0) || (ASTCENC_AVX > 0) || \ - (ASTCENC_POPCNT > 0) || (ASTCENC_F16C > 0) - /** * @brief Platform-specific function implementations. * * This module contains functions for querying the host extended ISA support. */ +// Include before the defines below to pick up any auto-setup based on compiler +// built-in config, if not being set explicitly by the build system #include "astcenc_internal.h" +#if (ASTCENC_SSE > 0) || (ASTCENC_AVX > 0) || \ + (ASTCENC_POPCNT > 0) || (ASTCENC_F16C > 0) + static int g_cpu_has_sse41 = -1; static int g_cpu_has_avx2 = -1; static int g_cpu_has_popcnt = -1; From fc2fbe8c64228cfec83d9a99c7b0bb55a8ca982e Mon Sep 17 00:00:00 2001 From: pete Date: Sun, 4 Apr 2021 23:14:26 +0100 Subject: [PATCH 04/17] Use explicit loop tail rather than over-fetching --- .../astcenc_ideal_endpoints_and_weights.cpp | 41 +++++++++++-------- 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/Source/astcenc_ideal_endpoints_and_weights.cpp b/Source/astcenc_ideal_endpoints_and_weights.cpp index 6e1906cf6..dd7640e59 100644 --- a/Source/astcenc_ideal_endpoints_and_weights.cpp +++ b/Source/astcenc_ideal_endpoints_and_weights.cpp @@ -844,6 +844,7 @@ void compute_ideal_weights_for_decimation_table( float* RESTRICT weight_set, float* RESTRICT weights ) { + int i; int texel_count = dt.texel_count; int weight_count = dt.weight_count; @@ -859,7 +860,7 @@ void compute_ideal_weights_for_decimation_table( // weights into both the weight set and the output epw copy. if (texel_count == weight_count) { - for (int i = 0; i < texel_count; i++) + for (i = 0; i < texel_count; i++) { assert(i == dt.weight_texel[0][i]); weight_set[i] = eai_in.weights[i]; @@ -874,7 +875,7 @@ void compute_ideal_weights_for_decimation_table( // epw copy and then do the full algorithm to decimate weights. else { - for (int i = 0; i < texel_count; i++) + for (i = 0; i < texel_count; i++) { eai_out.weights[i] = eai_in.weights[i]; eai_out.weight_error_scale[i] = eai_in.weight_error_scale[i]; @@ -885,9 +886,11 @@ void compute_ideal_weights_for_decimation_table( alignas(ASTCENC_VECALIGN) float infilled_weights[MAX_TEXELS_PER_BLOCK]; // Compute an initial average for each decimated weight + i = 0; + #if ASTCENC_SIMD_WIDTH >= 8 - int clipped_weight_count = round_up_to_simd_multiple_vla(weight_count); - for (int i = 0; i < clipped_weight_count; i += ASTCENC_SIMD_WIDTH) + int clipped_weight_count = round_down_to_simd_multiple_vla(weight_count); + for (/* */; i < clipped_weight_count; i += ASTCENC_SIMD_WIDTH) { // Start with a small value to avoid div-by-zero later vfloat weight_weight(1e-10f); @@ -918,8 +921,10 @@ void compute_ideal_weights_for_decimation_table( storea(weight_weight, weights + i); storea(initial_weight / weight_weight, weight_set + i); } -#else - for (int i = 0; i < weight_count; i++) +#endif + + // Loop tail + for (/* */; i < weight_count; i++) { // Start with a small value to avoid div-by-zero later float weight_weight = 1e-10f; @@ -941,13 +946,14 @@ void compute_ideal_weights_for_decimation_table( weights[i] = weight_weight; weight_set[i] = initial_weight / weight_weight; } -#endif // Populate the interpolated weight grid based on the initital average + i = 0; + #if ASTCENC_SIMD_WIDTH >= 8 // Process SIMD-width texel coordinates at at time while we can - int clipped_texel_count = round_up_to_simd_multiple_vla(texel_count); - for (int i = 0; i < clipped_texel_count; i += ASTCENC_SIMD_WIDTH) + int clipped_texel_count = round_down_to_simd_multiple_vla(texel_count); + for (/* */; i < clipped_texel_count; i += ASTCENC_SIMD_WIDTH) { vint texel_weights_0(dt.texel_weights_4t[0] + i); vint texel_weights_1(dt.texel_weights_4t[1] + i); @@ -971,8 +977,10 @@ void compute_ideal_weights_for_decimation_table( storea(weight, infilled_weights + i); } -#else - for (int i = 0; i < texel_count; i++) +#endif + + // Loop tail + for (/* */; i < texel_count; i++) { const uint8_t *texel_weights = dt.texel_weights_t4[i]; const float *texel_weights_float = dt.texel_weights_float_t4[i]; @@ -981,14 +989,14 @@ void compute_ideal_weights_for_decimation_table( + (weight_set[texel_weights[2]] * texel_weights_float[2] + weight_set[texel_weights[3]] * texel_weights_float[3]); } -#endif // Perform a single iteration of refinement constexpr float stepsize = 0.25f; constexpr float chd_scale = -TEXEL_WEIGHT_SUM; + i = 0; #if ASTCENC_SIMD_WIDTH >= 8 - for (int i = 0; i < clipped_weight_count; i += ASTCENC_SIMD_WIDTH) + for (/* */; i < clipped_weight_count; i += ASTCENC_SIMD_WIDTH) { // Start with a small value to avoid div-by-zero later vfloat weight_val = loada(weight_set + i); @@ -1027,8 +1035,10 @@ void compute_ideal_weights_for_decimation_table( // update the weight storea(weight_val + step, weight_set + i); } -#else - for (int i = 0; i < weight_count; i++) +#endif + + // Loop tail + for (/* */; i < weight_count; i++) { float weight_val = weight_set[i]; @@ -1058,7 +1068,6 @@ void compute_ideal_weights_for_decimation_table( // update the weight weight_set[i] = weight_val + step; } -#endif } /* From 245e890700208a046fc7b5b316fd6c9e080b7adf Mon Sep 17 00:00:00 2001 From: pete Date: Tue, 13 Apr 2021 23:37:15 +0100 Subject: [PATCH 05/17] Fix comment in NEON code --- Source/astcenc_vecmathlib_neon_4.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Source/astcenc_vecmathlib_neon_4.h b/Source/astcenc_vecmathlib_neon_4.h index a1163531f..e75b070eb 100755 --- a/Source/astcenc_vecmathlib_neon_4.h +++ b/Source/astcenc_vecmathlib_neon_4.h @@ -327,7 +327,9 @@ struct vmask4 } /** - * @brief Construct from an existing SIMD register. + * @brief Construct from 4 scalar values. + * + * The value of @c a is stored to lane 0 (LSB) in the SIMD register. */ ASTCENC_SIMD_INLINE explicit vmask4(bool a, bool b, bool c, bool d) { From b4275a5f08e0269a561b73e493a38a878b288479 Mon Sep 17 00:00:00 2001 From: pete Date: Tue, 13 Apr 2021 23:37:30 +0100 Subject: [PATCH 06/17] Exclude printf unless in debug build --- Source/astcenc_weight_align.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/astcenc_weight_align.cpp b/Source/astcenc_weight_align.cpp index 97da89d12..8f3843118 100644 --- a/Source/astcenc_weight_align.cpp +++ b/Source/astcenc_weight_align.cpp @@ -325,7 +325,7 @@ static void compute_angular_endpoints_for_quant_levels( // Did we find anything? // TODO: Can we do better than bsi = 0 here. We should at least // propagate an error (and move the printf into the CLI). -#if defined(NDEBUG) +#if !defined(NDEBUG) if (bsi < 0) { printf("WARNING: Unable to find encoding within specified error limit\n"); From 13f12fde7c8b072162db456f3a1a4455fe97fd63 Mon Sep 17 00:00:00 2001 From: pete Date: Wed, 19 May 2021 08:33:03 +0100 Subject: [PATCH 07/17] Fix documentation of astcenc_context_alloc Decompression does NOT require single threaded use. --- Source/astcenc.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/Source/astcenc.h b/Source/astcenc.h index 618ded497..4957b295b 100644 --- a/Source/astcenc.h +++ b/Source/astcenc.h @@ -686,11 +686,10 @@ ASTCENC_PUBLIC astcenc_error astcenc_config_init( * to serially compress or decompress multiple images to amortize setup cost. * * Contexts can be allocated to support only decompression by setting the - * ASTCENC_FLG_DECOMPRESS_ONLY flag when creating the configuration. These - * contexts must be allocated with a thread count of 1 (decompression is always - * single threaded), and the compression functions will fail if invoked. For - * a decompress-only library build the ASTCENC_FLG_DECOMPRESS_ONLY flag must - * be set when creating ay context. + * @c ASTCENC_FLG_DECOMPRESS_ONLY flag when creating the configuration. The + * compression functions will fail if invoked. For a decompress-only library + * build the @c ASTCENC_FLG_DECOMPRESS_ONLY flag must be set when creating any + * context. * * @param[in] config Codec config. * @param thread_count Thread count to configure for. From 60e2b2ed4d5a4330bd0e8f5995497dcdee9ea174 Mon Sep 17 00:00:00 2001 From: pete Date: Tue, 25 May 2021 08:02:46 +0100 Subject: [PATCH 08/17] Remove unused array from error_weight_block --- Source/astcenc_compress_symbolic.cpp | 1 - Source/astcenc_internal.h | 1 - 2 files changed, 2 deletions(-) diff --git a/Source/astcenc_compress_symbolic.cpp b/Source/astcenc_compress_symbolic.cpp index 5b5e55195..5ff72c377 100644 --- a/Source/astcenc_compress_symbolic.cpp +++ b/Source/astcenc_compress_symbolic.cpp @@ -1150,7 +1150,6 @@ static float prepare_error_weight_block( ewb->texel_weight_rg[i] = (wr + wg) * 0.5f; ewb->texel_weight_rb[i] = (wr + wb) * 0.5f; ewb->texel_weight_gb[i] = (wg + wb) * 0.5f; - ewb->texel_weight_ra[i] = (wr + wa) * 0.5f; ewb->texel_weight_gba[i] = (wg + wb + wa) * 0.333333f; ewb->texel_weight_rba[i] = (wr + wb + wa) * 0.333333f; diff --git a/Source/astcenc_internal.h b/Source/astcenc_internal.h index 565bec008..04e6c6186 100644 --- a/Source/astcenc_internal.h +++ b/Source/astcenc_internal.h @@ -617,7 +617,6 @@ struct error_weight_block float texel_weight_rg[MAX_TEXELS_PER_BLOCK]; float texel_weight_rb[MAX_TEXELS_PER_BLOCK]; float texel_weight_gb[MAX_TEXELS_PER_BLOCK]; - float texel_weight_ra[MAX_TEXELS_PER_BLOCK]; float texel_weight_r[MAX_TEXELS_PER_BLOCK]; float texel_weight_g[MAX_TEXELS_PER_BLOCK]; From 378fce5c850f6e7b1477666e784aad9b9d6e7c27 Mon Sep 17 00:00:00 2001 From: pete Date: Wed, 9 Jun 2021 09:29:07 +0100 Subject: [PATCH 09/17] Workaround MSVC 2019 codegen bug --- Source/astcenc_vecmathlib.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Source/astcenc_vecmathlib.h b/Source/astcenc_vecmathlib.h index aed6752c3..e4e847eb1 100644 --- a/Source/astcenc_vecmathlib.h +++ b/Source/astcenc_vecmathlib.h @@ -408,7 +408,15 @@ ASTCENC_SIMD_INLINE vint4 unorm16_to_sf16(vint4 p) vmask4 is_one = p == vint4(0xFFFF); vmask4 is_small = p < vint4(4); +// Manually inline clz() on Visual Studio to avoid release build codegen bug +#if !defined(__clang__) && defined(_MSC_VER) + vint a = (~lsr<8>(p)) & p; + a = float_as_int(int_to_float(a)); + a = vint4(127 + 31) - lsr<23>(a); + vint4 lz = clamp(0, 32, a) - 16; +#else vint4 lz = clz(p) - 16; +#endif // TODO: Could use AVX2 _mm_sllv_epi32() instead of p * 2^ p = p * two_to_the_n(lz + 1); From 3c188268a30a8b6d259c8ba47c8c49c003d1a9ae Mon Sep 17 00:00:00 2001 From: pete Date: Wed, 9 Jun 2021 11:38:24 +0100 Subject: [PATCH 10/17] Fix MSVC AVX2 builds --- Source/astcenc_vecmathlib.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/astcenc_vecmathlib.h b/Source/astcenc_vecmathlib.h index e4e847eb1..aefdc2b90 100644 --- a/Source/astcenc_vecmathlib.h +++ b/Source/astcenc_vecmathlib.h @@ -410,7 +410,7 @@ ASTCENC_SIMD_INLINE vint4 unorm16_to_sf16(vint4 p) // Manually inline clz() on Visual Studio to avoid release build codegen bug #if !defined(__clang__) && defined(_MSC_VER) - vint a = (~lsr<8>(p)) & p; + vint4 a = (~lsr<8>(p)) & p; a = float_as_int(int_to_float(a)); a = vint4(127 + 31) - lsr<23>(a); vint4 lz = clamp(0, 32, a) - 16; From 1d1348468cb6eeb2c065ff97905c7b4cd034586a Mon Sep 17 00:00:00 2001 From: Peter Harris Date: Wed, 14 Jul 2021 22:27:09 +0100 Subject: [PATCH 11/17] Fix arm64 invariance in NONE builds --- Source/UnitTest/test_simd.cpp | 4 ++-- Source/astcenc_vecmathlib_none_4.h | 10 ++++------ 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/Source/UnitTest/test_simd.cpp b/Source/UnitTest/test_simd.cpp index ba077ff02..daa7c348e 100644 --- a/Source/UnitTest/test_simd.cpp +++ b/Source/UnitTest/test_simd.cpp @@ -1061,11 +1061,11 @@ TEST(vfloat4, normalize_safe) /** @brief Test vfloat4 float_to_int. */ TEST(vfloat4, float_to_int) { - vfloat4 a(1.1f, 1.5f, 1.6f, 4.0f); + vfloat4 a(1.1f, 1.5f, -1.6f, 4.0f); vint4 r = float_to_int(a); EXPECT_EQ(r.lane<0>(), 1); EXPECT_EQ(r.lane<1>(), 1); - EXPECT_EQ(r.lane<2>(), 1); + EXPECT_EQ(r.lane<2>(), -1); EXPECT_EQ(r.lane<3>(), 4); } diff --git a/Source/astcenc_vecmathlib_none_4.h b/Source/astcenc_vecmathlib_none_4.h index 716d69824..7f7b833b5 100644 --- a/Source/astcenc_vecmathlib_none_4.h +++ b/Source/astcenc_vecmathlib_none_4.h @@ -924,12 +924,10 @@ ASTCENC_SIMD_INLINE void storea(vfloat4 a, float* ptr) */ ASTCENC_SIMD_INLINE vint4 float_to_int(vfloat4 a) { - // Casting to unsigned buys us an extra bit of precision in cases where - // we can use the integer as nasty bit hacks. - return vint4((unsigned int)a.m[0], - (unsigned int)a.m[1], - (unsigned int)a.m[2], - (unsigned int)a.m[3]); + return vint4((int)a.m[0], + (int)a.m[1], + (int)a.m[2], + (int)a.m[3]); } /**f From a93bacfe28668930f52371e9eabe759f3f652d35 Mon Sep 17 00:00:00 2001 From: pete Date: Fri, 20 Aug 2021 07:41:54 +0100 Subject: [PATCH 12/17] Use a ROM quant_mode_table to avoid parallel init --- Source/astcenc_entry.cpp | 1 - Source/astcenc_internal.h | 4 +- Source/astcenc_quantization.cpp | 212 ++++++++++++++++++++++++++------ 3 files changed, 173 insertions(+), 44 deletions(-) diff --git a/Source/astcenc_entry.cpp b/Source/astcenc_entry.cpp index 2cd4366c2..b775756ea 100644 --- a/Source/astcenc_entry.cpp +++ b/Source/astcenc_entry.cpp @@ -660,7 +660,6 @@ astcenc_error astcenc_context_alloc( #if !defined(ASTCENC_DECOMPRESS_ONLY) prepare_angular_tables(); #endif - build_quant_mode_table(); return ASTCENC_SUCCESS; } diff --git a/Source/astcenc_internal.h b/Source/astcenc_internal.h index 04e6c6186..006b764fd 100644 --- a/Source/astcenc_internal.h +++ b/Source/astcenc_internal.h @@ -853,7 +853,7 @@ int is_legal_3d_block_size( extern const uint8_t color_quant_tables[21][256]; extern const uint8_t color_unquant_tables[21][256]; -extern int8_t quant_mode_table[17][128]; +extern const int8_t quant_mode_table[17][128]; void encode_ise( int quant_level, @@ -883,8 +883,6 @@ int get_ise_sequence_bitcount( int items, quant_method quant); -void build_quant_mode_table(void); - // ********************************************** // functions and data pertaining to partitioning // ********************************************** diff --git a/Source/astcenc_quantization.cpp b/Source/astcenc_quantization.cpp index afc101601..3c1453d56 100644 --- a/Source/astcenc_quantization.cpp +++ b/Source/astcenc_quantization.cpp @@ -536,43 +536,175 @@ const uint8_t color_unquant_tables[21][256] = { // The quant_mode_table[integercount/2][bits] gives us the quantization // level for a given integer count and number of bits that the integer may fit // into. This is needed for color decoding, and for the color encoding. -int8_t quant_mode_table[17][128]; - -void build_quant_mode_table() -{ - for (int i = 0; i <= 16; i++) - { - for (int j = 0; j < 128; j++) - { - quant_mode_table[i][j] = -1; - } - } - - for (int i = 0; i < 21; i++) - { - for (int j = 1; j <= 16; j++) - { - int p = get_ise_sequence_bitcount(2 * j, (quant_method)i); - if (p < 128) - { - quant_mode_table[j][p] = i; - } - } - } - - for (int i = 0; i <= 16; i++) - { - int largest_value_so_far = -1; - for (int j = 0; j < 128; j++) - { - if (quant_mode_table[i][j] > largest_value_so_far) - { - largest_value_so_far = quant_mode_table[i][j]; - } - else - { - quant_mode_table[i][j] = largest_value_so_far; - } - } - } -} +const int8_t quant_mode_table[17][128] { + { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 + }, + { + -1, -1, 0, 0, 2, 3, 5, 6, 8, 9, 11, 12, 14, 15, 17, 18, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20 + }, + { + -1, -1, -1, -1, 0, 0, 0, 1, 2, 2, 3, 4, 5, 5, 6, 7, + 8, 8, 9, 10, 11, 11, 12, 13, 14, 14, 15, 16, 17, 17, 18, 19, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20 + }, + { + -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, + 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, + 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20 + }, + { + -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 1, 1, 1, + 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 7, + 8, 8, 8, 9, 9, 10, 10, 10, 11, 11, 11, 12, 12, 13, 13, 13, + 14, 14, 14, 15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 19, 19, 19, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20 + }, + { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 4, 4, 4, 4, 5, 5, + 5, 5, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 9, 9, 10, 10, + 10, 10, 11, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, + 15, 15, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 19, 19, 19, 19, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20 + }, + { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, + 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, + 8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, + 12, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, + 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20 + }, + { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, + 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6, + 6, 6, 6, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 9, 9, 9, + 9, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 12, 12, 12, 12, 13, + 13, 13, 13, 13, 14, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, + 16, 16, 17, 17, 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19, 19, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20 + }, + { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, + 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, + 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, + 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, + 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19, 19, 19 + }, + { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, + 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, + 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, + 6, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, + 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, + 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, + 14, 14, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 17, 17 + }, + { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, + 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, + 5, 5, 5, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, + 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 10, 10, 10, 10, + 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, + 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 15 + }, + { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, + 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, + 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, + 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, + 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, + 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13 + }, + { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, + 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11 + }, + { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, + 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10 + }, + { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, + 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9 + }, + { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8 + }, + { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 + } +}; From 9828f4f96196e7eb39ed5d3afebd2c92fa22e843 Mon Sep 17 00:00:00 2001 From: Pete Harris Date: Sat, 21 Aug 2021 16:25:56 +0100 Subject: [PATCH 13/17] Reject invalid encodings during decode --- Source/astcenc_symbolic_physical.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/Source/astcenc_symbolic_physical.cpp b/Source/astcenc_symbolic_physical.cpp index 894dc9337..45d0d09b7 100644 --- a/Source/astcenc_symbolic_physical.cpp +++ b/Source/astcenc_symbolic_physical.cpp @@ -289,6 +289,7 @@ void physical_to_symbolic( if (rsvbits != 3) { scb.error_block = 1; + return; } int vx_low_s = read_bits(8, 12, pcb.data) | (read_bits(5, 12 + 8, pcb.data) << 8); @@ -301,6 +302,7 @@ void physical_to_symbolic( if ((vx_low_s >= vx_high_s || vx_low_t >= vx_high_t) && !all_ones) { scb.error_block = 1; + return; } } else @@ -318,6 +320,7 @@ void physical_to_symbolic( if ((vx_low_s >= vx_high_s || vx_low_t >= vx_high_t || vx_low_p >= vx_high_p) && !all_ones) { scb.error_block = 1; + return; } } @@ -372,6 +375,7 @@ void physical_to_symbolic( if (is_dual_plane && partition_count == 4) { scb.error_block = 1; + return; } scb.color_formats_matched = 0; @@ -437,6 +441,7 @@ void physical_to_symbolic( if (color_integer_count > 18) { scb.error_block = 1; + return; } // then, determine the color endpoint format to use for these integers @@ -453,12 +458,14 @@ void physical_to_symbolic( } int color_quant_level = quant_mode_table[color_integer_count >> 1][color_bits]; - scb.color_quant_level = color_quant_level; if (color_quant_level < 4) { scb.error_block = 1; + return; } + scb.color_quant_level = color_quant_level; + // then unpack the integer-bits uint8_t values_to_decode[32]; decode_ise(color_quant_level, color_integer_count, pcb.data, values_to_decode, (partition_count == 1 ? 17 : 19 + PARTITION_BITS)); From 147bd91bd54321d6fd7e7ff17b10475c8788944f Mon Sep 17 00:00:00 2001 From: pete Date: Fri, 22 Oct 2021 16:11:34 +0100 Subject: [PATCH 14/17] Add ENABLE_ASAN option to CMake --- Source/cmake_core.cmake | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/Source/cmake_core.cmake b/Source/cmake_core.cmake index 4d58d5366..6fbfa8f35 100644 --- a/Source/cmake_core.cmake +++ b/Source/cmake_core.cmake @@ -119,9 +119,18 @@ macro(astcenc_set_properties NAME) $<$>:-Wno-implicit-int-conversion> $<$>:-Wno-shift-sign-overflow> $<$>:-Wno-format-nonliteral> - $<$:-Wdocumentation>) + if(${ENABLE_ASAN}) + target_compile_options(${NAME} + PRIVATE + $<$:-fsanitize=address>) + + target_link_options(${NAME} + PRIVATE + $<$:-fsanitize=address>) + endif() + target_link_options(${NAME} PRIVATE From 0a6ab35c734a3671cde80ff098fb9fc0eb16a3e3 Mon Sep 17 00:00:00 2001 From: pete Date: Fri, 22 Oct 2021 16:12:45 +0100 Subject: [PATCH 15/17] Improve handling of failed encodings --- Source/astcenc_compress_symbolic.cpp | 12 ++++++++++++ Source/astcenc_weight_align.cpp | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/Source/astcenc_compress_symbolic.cpp b/Source/astcenc_compress_symbolic.cpp index 5ff72c377..210f54330 100644 --- a/Source/astcenc_compress_symbolic.cpp +++ b/Source/astcenc_compress_symbolic.cpp @@ -1563,6 +1563,18 @@ void compress_block( trace_add_data("exit", "quality not hit"); END_OF_TESTS: + + // If we still have error blocks then convert them to something we can encode + // TODO: Do something more sensible here, such as average color block + if (scb.error_block) + { + scb.error_block = 0; + scb.block_mode = -2; + vfloat4 color_f32 = clamp(0.0f, 1.0f, blk->origin_texel) * 65535.0f; + vint4 color_u16 = float_to_int_rtn(color_f32); + store(color_u16, scb.constant_color); + } + // Compress to a physical block symbolic_to_physical(*bsd, scb, pcb); } diff --git a/Source/astcenc_weight_align.cpp b/Source/astcenc_weight_align.cpp index 8f3843118..a0a6a26cb 100644 --- a/Source/astcenc_weight_align.cpp +++ b/Source/astcenc_weight_align.cpp @@ -331,7 +331,7 @@ static void compute_angular_endpoints_for_quant_levels( printf("WARNING: Unable to find encoding within specified error limit\n"); bsi = 0; } -else +#else bsi = astc::max(0, bsi); #endif From 2fff47c230e3c4a2bc710ef48797b5e2f6a45126 Mon Sep 17 00:00:00 2001 From: pete Date: Wed, 23 Feb 2022 14:26:06 +0000 Subject: [PATCH 16/17] Fix image component count identification for KTX output --- Source/astcenccli_image.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Source/astcenccli_image.cpp b/Source/astcenccli_image.cpp index 92c4a3cfe..6c6a9beb0 100644 --- a/Source/astcenccli_image.cpp +++ b/Source/astcenccli_image.cpp @@ -1,6 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // ---------------------------------------------------------------------------- -// Copyright 2011-2021 Arm Limited +// Copyright 2011-2022 Arm Limited // // Licensed under the Apache License, Version 2.0 (the "License"); you may not // use this file except in compliance with the License. You may obtain a copy @@ -168,7 +168,7 @@ int determine_image_channels(const astcenc_image * img) } } - int image_channels = 1 + (is_luma == 0 ? 0 : 2) + (has_alpha ? 0 : 1); + int image_channels = 1 + (is_luma == 0 ? 2 : 0) + (has_alpha ? 1 : 0); return image_channels; } From 63e71920afc546659d18828935963844ec56f8f0 Mon Sep 17 00:00:00 2001 From: Pete Harris Date: Sat, 13 Aug 2022 14:44:49 +0100 Subject: [PATCH 17/17] Fix fp16/32 KTX loading --- Source/GoogleTest | 2 +- Source/astcenccli_image_load_store.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Source/GoogleTest b/Source/GoogleTest index 703bd9caa..e2239ee60 160000 --- a/Source/GoogleTest +++ b/Source/GoogleTest @@ -1 +1 @@ -Subproject commit 703bd9caab50b139428cea1aaff9974ebee5742e +Subproject commit e2239ee6043f73722e7aa812a459f54a28552929 diff --git a/Source/astcenccli_image_load_store.cpp b/Source/astcenccli_image_load_store.cpp index 09341ca60..ecc936449 100644 --- a/Source/astcenccli_image_load_store.cpp +++ b/Source/astcenccli_image_load_store.cpp @@ -916,7 +916,7 @@ static astcenc_image* load_ktx_uncompressed_image( } case GL_FLOAT: { - bitness = 32; + bitness = 16; bytes_per_component = 4; switch (hdr.gl_format) { @@ -1048,7 +1048,7 @@ static astcenc_image* load_ktx_uncompressed_image( } delete[] buf; - is_hdr = bitness == 32; + is_hdr = bitness == 16; component_count = components; return astc_img; }