diff --git a/chromium-78.0.3904.70-gcc9-drop-rsp-clobber.patch b/chromium-78.0.3904.70-gcc9-drop-rsp-clobber.patch deleted file mode 100644 index 6785b09..0000000 --- a/chromium-78.0.3904.70-gcc9-drop-rsp-clobber.patch +++ /dev/null @@ -1,24 +0,0 @@ -diff -up chromium-78.0.3904.70/third_party/tcmalloc/chromium/src/base/linux_syscall_support.h.gcc9 chromium-78.0.3904.70/third_party/tcmalloc/chromium/src/base/linux_syscall_support.h ---- chromium-78.0.3904.70/third_party/tcmalloc/chromium/src/base/linux_syscall_support.h.gcc9 2019-10-23 08:58:16.153251961 -0400 -+++ chromium-78.0.3904.70/third_party/tcmalloc/chromium/src/base/linux_syscall_support.h 2019-10-23 08:58:36.896862347 -0400 -@@ -1486,7 +1486,7 @@ struct kernel_stat { - "d"(LSS_SYSCALL_ARG(parent_tidptr)), - "r"(LSS_SYSCALL_ARG(newtls)), - "r"(LSS_SYSCALL_ARG(child_tidptr)) -- : "rsp", "memory", "r8", "r10", "r11", "rcx"); -+ : "memory", "r8", "r10", "r11", "rcx"); - } - LSS_RETURN(int, __res); - } -diff -up chromium-78.0.3904.70/third_party/tcmalloc/vendor/src/base/linux_syscall_support.h.gcc9 chromium-78.0.3904.70/third_party/tcmalloc/vendor/src/base/linux_syscall_support.h ---- chromium-78.0.3904.70/third_party/tcmalloc/vendor/src/base/linux_syscall_support.h.gcc9 2019-10-23 08:58:59.623435488 -0400 -+++ chromium-78.0.3904.70/third_party/tcmalloc/vendor/src/base/linux_syscall_support.h 2019-10-23 08:59:16.113125772 -0400 -@@ -1485,7 +1485,7 @@ struct kernel_stat { - "d"(LSS_SYSCALL_ARG(parent_tidptr)), - "r"(LSS_SYSCALL_ARG(newtls)), - "r"(LSS_SYSCALL_ARG(child_tidptr)) -- : "rsp", "memory", "r8", "r10", "r11", "rcx"); -+ : "memory", "r8", "r10", "r11", "rcx"); - } - LSS_RETURN(int, __res); - } diff --git a/chromium-90.0.4430.72-fstatfix.patch b/chromium-90.0.4430.72-fstatfix.patch deleted file mode 100644 index 6d3f71b..0000000 --- a/chromium-90.0.4430.72-fstatfix.patch +++ /dev/null @@ -1,141 +0,0 @@ -diff -up chromium-90.0.4430.72/sandbox/linux/seccomp-bpf-helpers/baseline_policy.cc.fstatfix chromium-90.0.4430.72/sandbox/linux/seccomp-bpf-helpers/baseline_policy.cc ---- chromium-90.0.4430.72/sandbox/linux/seccomp-bpf-helpers/baseline_policy.cc.fstatfix 2021-04-16 15:35:53.869542483 -0400 -+++ chromium-90.0.4430.72/sandbox/linux/seccomp-bpf-helpers/baseline_policy.cc 2021-04-16 15:38:19.754688717 -0400 -@@ -269,6 +269,18 @@ ResultExpr EvaluateSyscallImpl(int fs_de - return Allow(); - } - -+#if defined(__NR_newfstatat) -+ if (sysno == __NR_newfstatat) { -+ return RewriteFstatatSIGSYS(); -+ } -+#endif -+ -+#if defined(__NR_fstatat64) -+ if (sysno == __NR_fstatat64) { -+ return RewriteFstatatSIGSYS(); -+ } -+#endif -+ - if (SyscallSets::IsFileSystem(sysno) || - SyscallSets::IsCurrentDirectory(sysno)) { - return Error(fs_denied_errno); -diff -up chromium-90.0.4430.72/sandbox/linux/seccomp-bpf-helpers/sigsys_handlers.cc.fstatfix chromium-90.0.4430.72/sandbox/linux/seccomp-bpf-helpers/sigsys_handlers.cc ---- chromium-90.0.4430.72/sandbox/linux/seccomp-bpf-helpers/sigsys_handlers.cc.fstatfix 2021-04-14 14:41:08.000000000 -0400 -+++ chromium-90.0.4430.72/sandbox/linux/seccomp-bpf-helpers/sigsys_handlers.cc 2021-04-16 15:35:53.869542483 -0400 -@@ -6,6 +6,8 @@ - - #include "sandbox/linux/seccomp-bpf-helpers/sigsys_handlers.h" - -+#include -+#include - #include - #include - #include -@@ -355,6 +357,35 @@ intptr_t SIGSYSSchedHandler(const struct - return -ENOSYS; - } - -+intptr_t SIGSYSFstatatHandler(const struct arch_seccomp_data& args, -+ void* aux) { -+ switch (args.nr) { -+#if defined(__NR_newfstatat) -+ case __NR_newfstatat: -+#endif -+#if defined(__NR_fstatat64) -+ case __NR_fstatat64: -+#endif -+#if defined(__NR_newfstatat) || defined(__NR_fstatat64) -+ if (*reinterpret_cast(args.args[1]) == '\0' -+ && args.args[3] == static_cast(AT_EMPTY_PATH)) { -+ return sandbox::sys_fstat64(static_cast(args.args[0]), -+ reinterpret_cast(args.args[2])); -+ } else { -+ errno = EACCES; -+ return -1; -+ } -+ break; -+#endif -+ } -+ -+ CrashSIGSYS_Handler(args, aux); -+ -+ // Should never be reached. -+ RAW_CHECK(false); -+ return -ENOSYS; -+} -+ - bpf_dsl::ResultExpr CrashSIGSYS() { - return bpf_dsl::Trap(CrashSIGSYS_Handler, NULL); - } -@@ -387,6 +418,10 @@ bpf_dsl::ResultExpr RewriteSchedSIGSYS() - return bpf_dsl::Trap(SIGSYSSchedHandler, NULL); - } - -+bpf_dsl::ResultExpr RewriteFstatatSIGSYS() { -+ return bpf_dsl::Trap(SIGSYSFstatatHandler, NULL); -+} -+ - void AllocateCrashKeys() { - #if !defined(OS_NACL_NONSFI) - if (seccomp_crash_key) -diff -up chromium-90.0.4430.72/sandbox/linux/seccomp-bpf-helpers/sigsys_handlers.h.fstatfix chromium-90.0.4430.72/sandbox/linux/seccomp-bpf-helpers/sigsys_handlers.h ---- chromium-90.0.4430.72/sandbox/linux/seccomp-bpf-helpers/sigsys_handlers.h.fstatfix 2021-04-14 14:41:08.000000000 -0400 -+++ chromium-90.0.4430.72/sandbox/linux/seccomp-bpf-helpers/sigsys_handlers.h 2021-04-16 15:35:53.869542483 -0400 -@@ -62,6 +62,10 @@ SANDBOX_EXPORT intptr_t SIGSYSPtraceFail - // sched_setparam(), sched_setscheduler() - SANDBOX_EXPORT intptr_t SIGSYSSchedHandler(const arch_seccomp_data& args, - void* aux); -+// If the fstatat syscall is actually a disguised fstat, calls the regular fstat -+// syscall, otherwise, crashes in the same way as CrashSIGSYS_Handler. -+SANDBOX_EXPORT intptr_t SIGSYSFstatatHandler(const struct arch_seccomp_data& args, -+ void* aux); - - // Variants of the above functions for use with bpf_dsl. - SANDBOX_EXPORT bpf_dsl::ResultExpr CrashSIGSYS(); -@@ -72,6 +76,7 @@ SANDBOX_EXPORT bpf_dsl::ResultExpr Crash - SANDBOX_EXPORT bpf_dsl::ResultExpr CrashSIGSYSFutex(); - SANDBOX_EXPORT bpf_dsl::ResultExpr CrashSIGSYSPtrace(); - SANDBOX_EXPORT bpf_dsl::ResultExpr RewriteSchedSIGSYS(); -+SANDBOX_EXPORT bpf_dsl::ResultExpr RewriteFstatatSIGSYS(); - - // Allocates a crash key so that Seccomp information can be recorded. - void AllocateCrashKeys(); -diff -up chromium-90.0.4430.72/sandbox/linux/services/syscall_wrappers.cc.fstatfix chromium-90.0.4430.72/sandbox/linux/services/syscall_wrappers.cc ---- chromium-90.0.4430.72/sandbox/linux/services/syscall_wrappers.cc.fstatfix 2021-04-14 14:41:08.000000000 -0400 -+++ chromium-90.0.4430.72/sandbox/linux/services/syscall_wrappers.cc 2021-04-16 15:35:53.869542483 -0400 -@@ -261,4 +261,13 @@ int sys_sigaction(int signum, - - #endif // defined(MEMORY_SANITIZER) - -+SANDBOX_EXPORT int sys_fstat64(int fd, struct stat64 *buf) -+{ -+#if defined(__NR_fstat64) -+ return syscall(__NR_fstat64, fd, buf); -+#else -+ return syscall(__NR_fstat, fd, buf); -+#endif -+} -+ - } // namespace sandbox -diff -up chromium-90.0.4430.72/sandbox/linux/services/syscall_wrappers.h.fstatfix chromium-90.0.4430.72/sandbox/linux/services/syscall_wrappers.h ---- chromium-90.0.4430.72/sandbox/linux/services/syscall_wrappers.h.fstatfix 2021-04-14 14:41:08.000000000 -0400 -+++ chromium-90.0.4430.72/sandbox/linux/services/syscall_wrappers.h 2021-04-16 15:35:53.870542491 -0400 -@@ -17,6 +17,7 @@ struct sock_fprog; - struct rlimit64; - struct cap_hdr; - struct cap_data; -+struct stat64; - - namespace sandbox { - -@@ -84,6 +85,9 @@ SANDBOX_EXPORT int sys_sigaction(int sig - const struct sigaction* act, - struct sigaction* oldact); - -+// Recent glibc rewrites fstat to fstatat. -+SANDBOX_EXPORT int sys_fstat64(int fd, struct stat64 *buf); -+ - } // namespace sandbox - - #endif // SANDBOX_LINUX_SERVICES_SYSCALL_WRAPPERS_H_ diff --git a/chromium-92.0.4515.107-update-highway-0.12.2.patch b/chromium-92.0.4515.107-update-highway-0.12.2.patch deleted file mode 100644 index fca308b..0000000 --- a/chromium-92.0.4515.107-update-highway-0.12.2.patch +++ /dev/null @@ -1,4199 +0,0 @@ -diff -up chromium-92.0.4515.107/third_party/highway/src/CMakeLists.txt.in.update-highway-0.12.2 chromium-92.0.4515.107/third_party/highway/src/CMakeLists.txt.in -diff -up chromium-92.0.4515.107/third_party/highway/src/CMakeLists.txt.update-highway-0.12.2 chromium-92.0.4515.107/third_party/highway/src/CMakeLists.txt ---- chromium-92.0.4515.107/third_party/highway/src/CMakeLists.txt.update-highway-0.12.2 2021-07-19 14:47:23.000000000 -0400 -+++ chromium-92.0.4515.107/third_party/highway/src/CMakeLists.txt 2021-07-26 17:13:36.158002603 -0400 -@@ -19,7 +19,7 @@ if(POLICY CMP0083) - cmake_policy(SET CMP0083 NEW) - endif() - --project(hwy VERSION 0.1) -+project(hwy VERSION 0.12.2) # Keep in sync with highway.h version - - set(CMAKE_CXX_STANDARD 11) - set(CMAKE_CXX_EXTENSIONS OFF) -@@ -40,6 +40,8 @@ if (NOT CMAKE_BUILD_TYPE) - set(CMAKE_BUILD_TYPE RelWithDebInfo) - endif() - -+set(HWY_CMAKE_ARM7 OFF CACHE BOOL "Set copts for ARMv7 with NEON?") -+ - include(CheckCXXSourceCompiles) - check_cxx_source_compiles( - "int main() { -@@ -51,10 +53,13 @@ check_cxx_source_compiles( - HWY_EMSCRIPTEN - ) - -+set(HWY_CONTRIB_SOURCES -+ hwy/contrib/image/image.cc -+ hwy/contrib/image/image.h -+ hwy/contrib/math/math-inl.h -+) -+ - set(HWY_SOURCES -- contrib/image/image.cc -- contrib/image/image.h -- contrib/math/math-inl.h - hwy/aligned_allocator.cc - hwy/aligned_allocator.h - hwy/base.h -@@ -64,6 +69,7 @@ set(HWY_SOURCES - hwy/nanobenchmark.cc - hwy/nanobenchmark.h - hwy/ops/arm_neon-inl.h -+ hwy/ops/arm_sve-inl.h - hwy/ops/scalar-inl.h - hwy/ops/set_macros-inl.h - hwy/ops/shared-inl.h -@@ -146,13 +152,28 @@ else() - -fno-exceptions - ) - endif() --endif() -+ -+ if (HWY_CMAKE_ARM7) -+ list(APPEND HWY_FLAGS -+ -march=armv7-a -+ -mfpu=neon-vfpv4 -+ -mfloat-abi=hard # must match the toolchain specified as CXX= -+ -mfp16-format=ieee # required for vcvt_f32_f16 -+ ) -+ endif() # HWY_CMAKE_ARM7 -+ -+endif() # !MSVC - - add_library(hwy STATIC ${HWY_SOURCES}) - target_compile_options(hwy PRIVATE ${HWY_FLAGS}) - set_property(TARGET hwy PROPERTY POSITION_INDEPENDENT_CODE ON) - target_include_directories(hwy PUBLIC ${CMAKE_CURRENT_LIST_DIR}) - -+add_library(hwy_contrib STATIC ${HWY_CONTRIB_SOURCES}) -+target_compile_options(hwy_contrib PRIVATE ${HWY_FLAGS}) -+set_property(TARGET hwy_contrib PROPERTY POSITION_INDEPENDENT_CODE ON) -+target_include_directories(hwy_contrib PUBLIC ${CMAKE_CURRENT_LIST_DIR}) -+ - # -------------------------------------------------------- install library - install(TARGETS hwy - DESTINATION "${CMAKE_INSTALL_LIBDIR}") -@@ -166,9 +187,21 @@ foreach (source ${HWY_SOURCES}) - endif() - endforeach() - --# Add a pkg-config file for libhwy and the test library. -+install(TARGETS hwy_contrib -+ DESTINATION "${CMAKE_INSTALL_LIBDIR}") -+# Install all the headers keeping the relative path to the current directory -+# when installing them. -+foreach (source ${HWY_CONTRIB_SOURCES}) -+ if ("${source}" MATCHES "\.h$") -+ get_filename_component(dirname "${source}" DIRECTORY) -+ install(FILES "${source}" -+ DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/${dirname}") -+ endif() -+endforeach() -+ -+# Add a pkg-config file for libhwy and the contrib/test libraries. - set(HWY_LIBRARY_VERSION "${CMAKE_PROJECT_VERSION}") --foreach (pc libhwy.pc libhwy-test.pc) -+foreach (pc libhwy.pc libhwy-contrib.pc libhwy-test.pc) - configure_file("${CMAKE_CURRENT_SOURCE_DIR}/${pc}.in" "${pc}" @ONLY) - install(FILES "${CMAKE_CURRENT_BINARY_DIR}/${pc}" - DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig") -@@ -251,8 +284,8 @@ endif() - endif() # HWY_SYSTEM_GTEST - - set(HWY_TEST_FILES -- contrib/image/image_test.cc -- # contrib/math/math_test.cc -+ hwy/contrib/image/image_test.cc -+ # hwy/contrib/math/math_test.cc - hwy/aligned_allocator_test.cc - hwy/base_test.cc - hwy/highway_test.cc -@@ -274,11 +307,16 @@ foreach (TESTFILE IN LISTS HWY_TEST_FILE - get_filename_component(TESTNAME ${TESTFILE} NAME_WE) - add_executable(${TESTNAME} ${TESTFILE}) - target_compile_options(${TESTNAME} PRIVATE ${HWY_FLAGS}) -+ # Test all targets, not just the best/baseline. This changes the default -+ # policy to all-attainable; note that setting -DHWY_COMPILE_* directly can -+ # cause compile errors because only one may be set, and other CMakeLists.txt -+ # that include us may set them. -+ target_compile_options(${TESTNAME} PRIVATE -DHWY_IS_TEST=1) - - if(HWY_SYSTEM_GTEST) -- target_link_libraries(${TESTNAME} hwy GTest::GTest GTest::Main) -+ target_link_libraries(${TESTNAME} hwy hwy_contrib GTest::GTest GTest::Main) - else() -- target_link_libraries(${TESTNAME} hwy gtest gtest_main) -+ target_link_libraries(${TESTNAME} hwy hwy_contrib gtest gtest_main) - endif() - # Output test targets in the test directory. - set_target_properties(${TESTNAME} PROPERTIES PREFIX "tests/") -diff -up chromium-92.0.4515.107/third_party/highway/src/debian/changelog.update-highway-0.12.2 chromium-92.0.4515.107/third_party/highway/src/debian/changelog -diff -up chromium-92.0.4515.107/third_party/highway/src/hwy/aligned_allocator.h.update-highway-0.12.2 chromium-92.0.4515.107/third_party/highway/src/hwy/aligned_allocator.h ---- chromium-92.0.4515.107/third_party/highway/src/hwy/aligned_allocator.h.update-highway-0.12.2 2021-07-19 14:47:23.000000000 -0400 -+++ chromium-92.0.4515.107/third_party/highway/src/hwy/aligned_allocator.h 2021-07-26 17:15:37.281847484 -0400 -@@ -111,6 +111,32 @@ AlignedUniquePtr MakeUniqueAligned(Ar - new (ptr) T(std::forward(args)...), AlignedDeleter()); - } - -+// Helpers for array allocators (avoids overflow) -+namespace detail { -+ -+// Returns x such that 1u << x == n (if n is a power of two). -+static inline constexpr size_t ShiftCount(size_t n) { -+ return (n <= 1) ? 0 : 1 + ShiftCount(n / 2); -+} -+ -+template -+T* AllocateAlignedItems(size_t items, AllocPtr alloc_ptr, void* opaque_ptr) { -+ constexpr size_t size = sizeof(T); -+ -+ constexpr bool is_pow2 = (size & (size - 1)) == 0; -+ constexpr size_t bits = ShiftCount(size); -+ static_assert(!is_pow2 || (1ull << bits) == size, "ShiftCount is incorrect"); -+ -+ const size_t bytes = is_pow2 ? items << bits : items * size; -+ const size_t check = is_pow2 ? bytes >> bits : bytes / size; -+ if (check != items) { -+ return nullptr; // overflowed -+ } -+ return static_cast(AllocateAlignedBytes(bytes, alloc_ptr, opaque_ptr)); -+} -+ -+} // namespace detail -+ - // Aligned memory equivalent of make_unique for array types using the - // custom allocators alloc/free. This function calls the constructor with the - // passed Args... on every created item. The destructor of each element will be -@@ -118,10 +144,11 @@ AlignedUniquePtr MakeUniqueAligned(Ar - template - AlignedUniquePtr MakeUniqueAlignedArrayWithAlloc( - size_t items, AllocPtr alloc, FreePtr free, void* opaque, Args&&... args) { -- T* ptr = -- static_cast(AllocateAlignedBytes(items * sizeof(T), alloc, opaque)); -- for (size_t i = 0; i < items; i++) { -- new (ptr + i) T(std::forward(args)...); -+ T* ptr = detail::AllocateAlignedItems(items, alloc, opaque); -+ if (ptr != nullptr) { -+ for (size_t i = 0; i < items; i++) { -+ new (ptr + i) T(std::forward(args)...); -+ } - } - return AlignedUniquePtr(ptr, AlignedDeleter(free, opaque)); - } -@@ -165,7 +192,7 @@ template - AlignedFreeUniquePtr AllocateAligned(const size_t items, AllocPtr alloc, - FreePtr free, void* opaque) { - return AlignedFreeUniquePtr( -- static_cast(AllocateAlignedBytes(items * sizeof(T), alloc, opaque)), -+ detail::AllocateAlignedItems(items, alloc, opaque), - AlignedFreer(free, opaque)); - } - -diff -up chromium-92.0.4515.107/third_party/highway/src/hwy/aligned_allocator_test.cc.update-highway-0.12.2 chromium-92.0.4515.107/third_party/highway/src/hwy/aligned_allocator_test.cc ---- chromium-92.0.4515.107/third_party/highway/src/hwy/aligned_allocator_test.cc.update-highway-0.12.2 2021-07-19 14:47:23.000000000 -0400 -+++ chromium-92.0.4515.107/third_party/highway/src/hwy/aligned_allocator_test.cc 2021-07-26 17:16:43.672858709 -0400 -@@ -16,6 +16,7 @@ - - #include - -+#include - #include - #include - #include -@@ -87,6 +88,32 @@ TEST(AlignedAllocatorTest, FreeNullptr) - /*opaque_ptr=*/nullptr); - } - -+TEST(AlignedAllocatorTest, Log2) { -+ EXPECT_EQ(0u, detail::ShiftCount(1)); -+ EXPECT_EQ(1u, detail::ShiftCount(2)); -+ EXPECT_EQ(3u, detail::ShiftCount(8)); -+} -+ -+// Allocator returns null when it detects overflow of items * sizeof(T). -+TEST(AlignedAllocatorTest, Overflow) { -+ constexpr size_t max = ~size_t(0); -+ constexpr size_t msb = (max >> 1) + 1; -+ using Size5 = std::array; -+ using Size10 = std::array; -+ EXPECT_EQ(nullptr, -+ detail::AllocateAlignedItems(max / 2, nullptr, nullptr)); -+ EXPECT_EQ(nullptr, -+ detail::AllocateAlignedItems(max / 3, nullptr, nullptr)); -+ EXPECT_EQ(nullptr, -+ detail::AllocateAlignedItems(max / 4, nullptr, nullptr)); -+ EXPECT_EQ(nullptr, -+ detail::AllocateAlignedItems(msb, nullptr, nullptr)); -+ EXPECT_EQ(nullptr, -+ detail::AllocateAlignedItems(msb + 1, nullptr, nullptr)); -+ EXPECT_EQ(nullptr, -+ detail::AllocateAlignedItems(msb / 4, nullptr, nullptr)); -+} -+ - TEST(AlignedAllocatorTest, AllocDefaultPointers) { - const size_t kSize = 7777; - void* ptr = AllocateAlignedBytes(kSize, /*alloc_ptr=*/nullptr, -@@ -215,7 +242,8 @@ TEST(AlignedAllocatorTest, MakeUniqueAli - auto arr = MakeUniqueAlignedArrayWithAlloc>( - 7, FakeAllocator::StaticAlloc, FakeAllocator::StaticFree, &fake_alloc, - &counter); -- // An array shold still only call a single allocation. -+ ASSERT_NE(nullptr, arr.get()); -+ // An array should still only call a single allocation. - EXPECT_EQ(1u, fake_alloc.PendingAllocs()); - EXPECT_EQ(7, counter); - for (size_t i = 0; i < 7; i++) { -diff -up chromium-92.0.4515.107/third_party/highway/src/hwy/base.h.update-highway-0.12.2 chromium-92.0.4515.107/third_party/highway/src/hwy/base.h ---- chromium-92.0.4515.107/third_party/highway/src/hwy/base.h.update-highway-0.12.2 2021-07-19 14:47:23.000000000 -0400 -+++ chromium-92.0.4515.107/third_party/highway/src/hwy/base.h 2021-07-26 17:16:04.753265910 -0400 -@@ -203,6 +203,10 @@ - #define HWY_ARCH_X86_64 0 - #endif - -+#if HWY_ARCH_X86_32 && HWY_ARCH_X86_64 -+#error "Cannot have both x86-32 and x86-64" -+#endif -+ - #if HWY_ARCH_X86_32 || HWY_ARCH_X86_64 - #define HWY_ARCH_X86 1 - #else -@@ -249,9 +253,11 @@ - #define HWY_ARCH_RVV 0 - #endif - -+// It is an error to detect multiple architectures at the same time, but OK to -+// detect none of the above. - #if (HWY_ARCH_X86 + HWY_ARCH_PPC + HWY_ARCH_ARM + HWY_ARCH_WASM + \ -- HWY_ARCH_RVV) != 1 --#error "Must detect exactly one platform" -+ HWY_ARCH_RVV) > 1 -+#error "Must not detect more than one architecture" - #endif - - //------------------------------------------------------------------------------ -@@ -328,6 +334,12 @@ static constexpr HWY_MAYBE_UNUSED size_t - - // RVV already has a builtin type and the GCC intrinsics require it. - #if HWY_ARCH_RVV && HWY_COMPILER_GCC -+#define HWY_NATIVE_FLOAT16 1 -+#else -+#define HWY_NATIVE_FLOAT16 0 -+#endif -+ -+#if HWY_NATIVE_FLOAT16 - using float16_t = __fp16; - // Clang does not allow __fp16 arguments, but scalar.h requires LaneType - // arguments, so use a wrapper. -@@ -597,7 +609,7 @@ HWY_API size_t PopCount(uint64_t x) { - return static_cast(__builtin_popcountll(x)); - #elif HWY_COMPILER_MSVC && HWY_ARCH_X86_64 - return _mm_popcnt_u64(x); --#elif HWY_COMPILER_MSVC -+#elif HWY_COMPILER_MSVC && HWY_ARCH_X86_32 - return _mm_popcnt_u32(uint32_t(x)) + _mm_popcnt_u32(uint32_t(x >> 32)); - #else - x -= ((x >> 1) & 0x55555555U); -diff -up chromium-92.0.4515.107/third_party/highway/src/hwy/cache_control.h.update-highway-0.12.2 chromium-92.0.4515.107/third_party/highway/src/hwy/cache_control.h ---- chromium-92.0.4515.107/third_party/highway/src/hwy/cache_control.h.update-highway-0.12.2 2021-07-19 14:47:23.000000000 -0400 -+++ chromium-92.0.4515.107/third_party/highway/src/hwy/cache_control.h 2021-07-26 17:16:26.004589594 -0400 -@@ -32,6 +32,14 @@ - #include // SSE2 - #endif - -+// Windows.h #defines these, which causes infinite recursion. Temporarily -+// undefine them in this header; these functions are anyway deprecated. -+// TODO(janwas): remove when these functions are removed. -+#pragma push_macro("LoadFence") -+#pragma push_macro("StoreFence") -+#undef LoadFence -+#undef StoreFence -+ - namespace hwy { - - // Even if N*sizeof(T) is smaller, Stream may write a multiple of this size. -@@ -83,6 +91,17 @@ HWY_INLINE HWY_ATTR_CACHE void FlushCach - #endif - } - -+// Reduces power consumption in spin-loops. No effect on non-x86. -+HWY_INLINE HWY_ATTR_CACHE void Pause() { -+#if HWY_ARCH_X86 && !defined(HWY_DISABLE_CACHE_CONTROL) -+ _mm_pause(); -+#endif -+} -+ - } // namespace hwy - -+// TODO(janwas): remove when these functions are removed. (See above.) -+#pragma pop_macro("StoreFence") -+#pragma pop_macro("LoadFence") -+ - #endif // HIGHWAY_HWY_CACHE_CONTROL_H_ -diff -up chromium-92.0.4515.107/third_party/highway/src/hwy/examples/skeleton.cc.update-highway-0.12.2 chromium-92.0.4515.107/third_party/highway/src/hwy/examples/skeleton.cc -diff -up chromium-92.0.4515.107/third_party/highway/src/hwy/examples/skeleton_test.cc.update-highway-0.12.2 chromium-92.0.4515.107/third_party/highway/src/hwy/examples/skeleton_test.cc -diff -up chromium-92.0.4515.107/third_party/highway/src/hwy/highway.h.update-highway-0.12.2 chromium-92.0.4515.107/third_party/highway/src/hwy/highway.h ---- chromium-92.0.4515.107/third_party/highway/src/hwy/highway.h.update-highway-0.12.2 2021-07-19 14:47:23.000000000 -0400 -+++ chromium-92.0.4515.107/third_party/highway/src/hwy/highway.h 2021-07-26 17:16:58.109078590 -0400 -@@ -25,10 +25,10 @@ - - namespace hwy { - --// API version (https://semver.org/) -+// API version (https://semver.org/); keep in sync with CMakeLists.txt. - #define HWY_MAJOR 0 - #define HWY_MINOR 12 --#define HWY_PATCH 0 -+#define HWY_PATCH 2 - - //------------------------------------------------------------------------------ - // Shorthand for descriptors (defined in shared-inl.h) used to select overloads. -@@ -49,7 +49,7 @@ namespace hwy { - HWY_FULL_RECOMPOSER((__VA_ARGS__, HWY_FULL2, HWY_FULL1, )) - #define HWY_FULL(...) HWY_CHOOSE_FULL(__VA_ARGS__())(__VA_ARGS__) - --// Vector of up to MAX_N lanes. -+// Vector of up to MAX_N lanes. Discouraged, when possible, use Half<> instead. - #define HWY_CAPPED(T, MAX_N) \ - hwy::HWY_NAMESPACE::Simd - -@@ -75,6 +75,10 @@ namespace hwy { - #define HWY_STATIC_DISPATCH(FUNC_NAME) N_WASM::FUNC_NAME - #elif HWY_STATIC_TARGET == HWY_NEON - #define HWY_STATIC_DISPATCH(FUNC_NAME) N_NEON::FUNC_NAME -+#elif HWY_STATIC_TARGET == HWY_SVE -+#define HWY_STATIC_DISPATCH(FUNC_NAME) N_SVE::FUNC_NAME -+#elif HWY_STATIC_TARGET == HWY_SVE2 -+#define HWY_STATIC_DISPATCH(FUNC_NAME) N_SVE2::FUNC_NAME - #elif HWY_STATIC_TARGET == HWY_PPC8 - #define HWY_STATIC_DISPATCH(FUNC_NAME) N_PPC8::FUNC_NAME - #elif HWY_STATIC_TARGET == HWY_SSE4 -@@ -143,6 +147,18 @@ FunctionCache Function - #define HWY_CHOOSE_NEON(FUNC_NAME) nullptr - #endif - -+#if HWY_TARGETS & HWY_SVE -+#define HWY_CHOOSE_SVE(FUNC_NAME) &N_SVE::FUNC_NAME -+#else -+#define HWY_CHOOSE_SVE(FUNC_NAME) nullptr -+#endif -+ -+#if HWY_TARGETS & HWY_SVE2 -+#define HWY_CHOOSE_SVE2(FUNC_NAME) &N_SVE2::FUNC_NAME -+#else -+#define HWY_CHOOSE_SVE2(FUNC_NAME) nullptr -+#endif -+ - #if HWY_TARGETS & HWY_PPC8 - #define HWY_CHOOSE_PCC8(FUNC_NAME) &N_PPC8::FUNC_NAME - #else -@@ -261,8 +277,11 @@ FunctionCache Function - #elif HWY_TARGET == HWY_AVX3 - #include "hwy/ops/x86_512-inl.h" - #elif HWY_TARGET == HWY_PPC8 -+#error "PPC is not yet supported" - #elif HWY_TARGET == HWY_NEON - #include "hwy/ops/arm_neon-inl.h" -+#elif HWY_TARGET == HWY_SVE || HWY_TARGET == HWY_SVE2 -+#include "hwy/ops/arm_sve-inl.h" - #elif HWY_TARGET == HWY_WASM - #include "hwy/ops/wasm_128-inl.h" - #elif HWY_TARGET == HWY_RVV -diff -up chromium-92.0.4515.107/third_party/highway/src/hwy/nanobenchmark.cc.update-highway-0.12.2 chromium-92.0.4515.107/third_party/highway/src/hwy/nanobenchmark.cc ---- chromium-92.0.4515.107/third_party/highway/src/hwy/nanobenchmark.cc.update-highway-0.12.2 2021-07-19 14:47:23.000000000 -0400 -+++ chromium-92.0.4515.107/third_party/highway/src/hwy/nanobenchmark.cc 2021-07-26 17:17:12.094291603 -0400 -@@ -29,6 +29,22 @@ - #include - #include - -+#if defined(_WIN32) || defined(_WIN64) -+#ifndef NOMINMAX -+#define NOMINMAX -+#endif // NOMINMAX -+#include -+#endif -+ -+#if defined(__MACH__) -+#include -+#include -+#endif -+ -+#if defined(__HAIKU__) -+#include -+#endif -+ - #include "hwy/base.h" - #if HWY_ARCH_PPC - #include // NOLINT __ppc_get_timebase_freq -@@ -43,114 +59,13 @@ - #endif // HWY_ARCH_X86 - - namespace hwy { --namespace platform { - namespace { -- --#if HWY_ARCH_X86 -- --void Cpuid(const uint32_t level, const uint32_t count, -- uint32_t* HWY_RESTRICT abcd) { --#if HWY_COMPILER_MSVC -- int regs[4]; -- __cpuidex(regs, level, count); -- for (int i = 0; i < 4; ++i) { -- abcd[i] = regs[i]; -- } --#else -- uint32_t a; -- uint32_t b; -- uint32_t c; -- uint32_t d; -- __cpuid_count(level, count, a, b, c, d); -- abcd[0] = a; -- abcd[1] = b; -- abcd[2] = c; -- abcd[3] = d; --#endif --} -- --std::string BrandString() { -- char brand_string[49]; -- std::array abcd; -- -- // Check if brand string is supported (it is on all reasonable Intel/AMD) -- Cpuid(0x80000000U, 0, abcd.data()); -- if (abcd[0] < 0x80000004U) { -- return std::string(); -- } -- -- for (size_t i = 0; i < 3; ++i) { -- Cpuid(static_cast(0x80000002U + i), 0, abcd.data()); -- memcpy(brand_string + i * 16, abcd.data(), sizeof(abcd)); -- } -- brand_string[48] = 0; -- return brand_string; --} -- --// Returns the frequency quoted inside the brand string. This does not --// account for throttling nor Turbo Boost. --double NominalClockRate() { -- const std::string& brand_string = BrandString(); -- // Brand strings include the maximum configured frequency. These prefixes are -- // defined by Intel CPUID documentation. -- const char* prefixes[3] = {"MHz", "GHz", "THz"}; -- const double multipliers[3] = {1E6, 1E9, 1E12}; -- for (size_t i = 0; i < 3; ++i) { -- const size_t pos_prefix = brand_string.find(prefixes[i]); -- if (pos_prefix != std::string::npos) { -- const size_t pos_space = brand_string.rfind(' ', pos_prefix - 1); -- if (pos_space != std::string::npos) { -- const std::string digits = -- brand_string.substr(pos_space + 1, pos_prefix - pos_space - 1); -- return std::stod(digits) * multipliers[i]; -- } -- } -- } -- -- return 0.0; --} -- --#endif // HWY_ARCH_X86 -- --} // namespace -- --// Returns tick rate. Invariant means the tick counter frequency is independent --// of CPU throttling or sleep. May be expensive, caller should cache the result. --double InvariantTicksPerSecond() { --#if HWY_ARCH_PPC -- return __ppc_get_timebase_freq(); --#elif HWY_ARCH_X86 -- // We assume the TSC is invariant; it is on all recent Intel/AMD CPUs. -- return NominalClockRate(); --#else -- // Fall back to clock_gettime nanoseconds. -- return 1E9; --#endif --} -- --} // namespace platform --namespace { -- --// Prevents the compiler from eliding the computations that led to "output". --template --inline void PreventElision(T&& output) { --#if HWY_COMPILER_MSVC == 0 -- // Works by indicating to the compiler that "output" is being read and -- // modified. The +r constraint avoids unnecessary writes to memory, but only -- // works for built-in types (typically FuncOutput). -- asm volatile("" : "+r"(output) : : "memory"); --#else -- // MSVC does not support inline assembly anymore (and never supported GCC's -- // RTL constraints). Self-assignment with #pragma optimize("off") might be -- // expected to prevent elision, but it does not with MSVC 2015. Type-punning -- // with volatile pointers generates inefficient code on MSVC 2017. -- static std::atomic dummy(T{}); -- dummy.store(output, std::memory_order_relaxed); --#endif --} -- - namespace timer { - -+// Ticks := platform-specific timer values (CPU cycles on x86). Must be -+// unsigned to guarantee wraparound on overflow. -+using Ticks = uint64_t; -+ - // Start/Stop return absolute timestamps and must be placed immediately before - // and after the region to measure. We provide separate Start/Stop functions - // because they use different fences. -@@ -202,8 +117,8 @@ namespace timer { - - // Returns a 64-bit timestamp in unit of 'ticks'; to convert to seconds, - // divide by InvariantTicksPerSecond. --inline uint64_t Start64() { -- uint64_t t; -+inline Ticks Start() { -+ Ticks t; - #if HWY_ARCH_PPC - asm volatile("mfspr %0, %1" : "=r"(t) : "i"(268)); - #elif HWY_ARCH_X86 && HWY_COMPILER_MSVC -@@ -228,8 +143,15 @@ inline uint64_t Start64() { - : "rdx", "memory", "cc"); - #elif HWY_ARCH_RVV - asm volatile("rdcycle %0" : "=r"(t)); --#else -- // Fall back to OS - unsure how to reliably query cntvct_el0 frequency. -+#elif defined(_WIN32) || defined(_WIN64) -+ LARGE_INTEGER counter; -+ (void)QueryPerformanceCounter(&counter); -+ t = counter.QuadPart; -+#elif defined(__MACH__) -+ t = mach_absolute_time(); -+#elif defined(__HAIKU__) -+ t = system_time_nsecs(); // since boot -+#else // POSIX - timespec ts; - clock_gettime(CLOCK_MONOTONIC, &ts); - t = ts.tv_sec * 1000000000LL + ts.tv_nsec; -@@ -237,7 +159,7 @@ inline uint64_t Start64() { - return t; - } - --inline uint64_t Stop64() { -+inline Ticks Stop() { - uint64_t t; - #if HWY_ARCH_PPC - asm volatile("mfspr %0, %1" : "=r"(t) : "i"(268)); -@@ -261,61 +183,7 @@ inline uint64_t Stop64() { - // "cc" = flags modified by SHL. - : "rcx", "rdx", "memory", "cc"); - #else -- t = Start64(); --#endif -- return t; --} -- --// Returns a 32-bit timestamp with about 4 cycles less overhead than --// Start64. Only suitable for measuring very short regions because the --// timestamp overflows about once a second. --inline uint32_t Start32() { -- uint32_t t; --#if HWY_ARCH_X86 && HWY_COMPILER_MSVC -- _ReadWriteBarrier(); -- _mm_lfence(); -- _ReadWriteBarrier(); -- t = static_cast(__rdtsc()); -- _ReadWriteBarrier(); -- _mm_lfence(); -- _ReadWriteBarrier(); --#elif HWY_ARCH_X86_64 -- asm volatile( -- "lfence\n\t" -- "rdtsc\n\t" -- "lfence" -- : "=a"(t) -- : -- // "memory" avoids reordering. rdx = TSC >> 32. -- : "rdx", "memory"); --#elif HWY_ARCH_RVV -- asm volatile("rdcycle %0" : "=r"(t)); --#else -- t = static_cast(Start64()); --#endif -- return t; --} -- --inline uint32_t Stop32() { -- uint32_t t; --#if HWY_ARCH_X86 && HWY_COMPILER_MSVC -- _ReadWriteBarrier(); -- unsigned aux; -- t = static_cast(__rdtscp(&aux)); -- _ReadWriteBarrier(); -- _mm_lfence(); -- _ReadWriteBarrier(); --#elif HWY_ARCH_X86_64 -- // Use inline asm because __rdtscp generates code to store TSC_AUX (ecx). -- asm volatile( -- "rdtscp\n\t" -- "lfence" -- : "=a"(t) -- : -- // "memory" avoids reordering. rcx = TSC_AUX. rdx = TSC >> 32. -- : "rcx", "rdx", "memory"); --#else -- t = static_cast(Stop64()); -+ t = Start(); - #endif - return t; - } -@@ -440,21 +308,130 @@ T MedianAbsoluteDeviation(const T* value - } - - } // namespace robust_statistics -+} // namespace -+namespace platform { -+namespace { - --// Ticks := platform-specific timer values (CPU cycles on x86). Must be --// unsigned to guarantee wraparound on overflow. 32 bit timers are faster to --// read than 64 bit. --using Ticks = uint32_t; -+// Prevents the compiler from eliding the computations that led to "output". -+template -+inline void PreventElision(T&& output) { -+#if HWY_COMPILER_MSVC == 0 -+ // Works by indicating to the compiler that "output" is being read and -+ // modified. The +r constraint avoids unnecessary writes to memory, but only -+ // works for built-in types (typically FuncOutput). -+ asm volatile("" : "+r"(output) : : "memory"); -+#else -+ // MSVC does not support inline assembly anymore (and never supported GCC's -+ // RTL constraints). Self-assignment with #pragma optimize("off") might be -+ // expected to prevent elision, but it does not with MSVC 2015. Type-punning -+ // with volatile pointers generates inefficient code on MSVC 2017. -+ static std::atomic dummy(T{}); -+ dummy.store(output, std::memory_order_relaxed); -+#endif -+} -+ -+#if HWY_ARCH_X86 -+ -+void Cpuid(const uint32_t level, const uint32_t count, -+ uint32_t* HWY_RESTRICT abcd) { -+#if HWY_COMPILER_MSVC -+ int regs[4]; -+ __cpuidex(regs, level, count); -+ for (int i = 0; i < 4; ++i) { -+ abcd[i] = regs[i]; -+ } -+#else -+ uint32_t a; -+ uint32_t b; -+ uint32_t c; -+ uint32_t d; -+ __cpuid_count(level, count, a, b, c, d); -+ abcd[0] = a; -+ abcd[1] = b; -+ abcd[2] = c; -+ abcd[3] = d; -+#endif -+} -+ -+std::string BrandString() { -+ char brand_string[49]; -+ std::array abcd; -+ -+ // Check if brand string is supported (it is on all reasonable Intel/AMD) -+ Cpuid(0x80000000U, 0, abcd.data()); -+ if (abcd[0] < 0x80000004U) { -+ return std::string(); -+ } -+ -+ for (size_t i = 0; i < 3; ++i) { -+ Cpuid(static_cast(0x80000002U + i), 0, abcd.data()); -+ memcpy(brand_string + i * 16, abcd.data(), sizeof(abcd)); -+ } -+ brand_string[48] = 0; -+ return brand_string; -+} -+ -+// Returns the frequency quoted inside the brand string. This does not -+// account for throttling nor Turbo Boost. -+double NominalClockRate() { -+ const std::string& brand_string = BrandString(); -+ // Brand strings include the maximum configured frequency. These prefixes are -+ // defined by Intel CPUID documentation. -+ const char* prefixes[3] = {"MHz", "GHz", "THz"}; -+ const double multipliers[3] = {1E6, 1E9, 1E12}; -+ for (size_t i = 0; i < 3; ++i) { -+ const size_t pos_prefix = brand_string.find(prefixes[i]); -+ if (pos_prefix != std::string::npos) { -+ const size_t pos_space = brand_string.rfind(' ', pos_prefix - 1); -+ if (pos_space != std::string::npos) { -+ const std::string digits = -+ brand_string.substr(pos_space + 1, pos_prefix - pos_space - 1); -+ return std::stod(digits) * multipliers[i]; -+ } -+ } -+ } -+ -+ return 0.0; -+} -+ -+#endif // HWY_ARCH_X86 -+ -+} // namespace -+ -+double InvariantTicksPerSecond() { -+#if HWY_ARCH_PPC -+ return __ppc_get_timebase_freq(); -+#elif HWY_ARCH_X86 -+ // We assume the TSC is invariant; it is on all recent Intel/AMD CPUs. -+ return NominalClockRate(); -+#elif defined(_WIN32) || defined(_WIN64) -+ LARGE_INTEGER freq; -+ (void)QueryPerformanceFrequency(&freq); -+ return double(freq.QuadPart); -+#elif defined(__MACH__) -+ // https://developer.apple.com/library/mac/qa/qa1398/_index.html -+ mach_timebase_info_data_t timebase; -+ (void)mach_timebase_info(&timebase); -+ return double(timebase.denom) / timebase.numer * 1E9; -+#else -+ // TODO(janwas): ARM? Unclear how to reliably query cntvct_el0 frequency. -+ return 1E9; // Haiku and clock_gettime return nanoseconds. -+#endif -+} - --// Returns timer overhead / minimum measurable difference. --Ticks TimerResolution() { -+double Now() { -+ static const double mul = 1.0 / InvariantTicksPerSecond(); -+ return static_cast(timer::Start()) * mul; -+} -+ -+uint64_t TimerResolution() { - // Nested loop avoids exceeding stack/L1 capacity. -- Ticks repetitions[Params::kTimerSamples]; -+ timer::Ticks repetitions[Params::kTimerSamples]; - for (size_t rep = 0; rep < Params::kTimerSamples; ++rep) { -- Ticks samples[Params::kTimerSamples]; -+ timer::Ticks samples[Params::kTimerSamples]; - for (size_t i = 0; i < Params::kTimerSamples; ++i) { -- const Ticks t0 = timer::Start32(); -- const Ticks t1 = timer::Stop32(); -+ const timer::Ticks t0 = timer::Start(); -+ const timer::Ticks t1 = timer::Stop(); - samples[i] = t1 - t0; - } - repetitions[rep] = robust_statistics::Mode(samples); -@@ -462,18 +439,21 @@ Ticks TimerResolution() { - return robust_statistics::Mode(repetitions); - } - --static const Ticks timer_resolution = TimerResolution(); -+} // namespace platform -+namespace { -+ -+static const timer::Ticks timer_resolution = platform::TimerResolution(); - - // Estimates the expected value of "lambda" values with a variable number of - // samples until the variability "rel_mad" is less than "max_rel_mad". - template --Ticks SampleUntilStable(const double max_rel_mad, double* rel_mad, -- const Params& p, const Lambda& lambda) { -+timer::Ticks SampleUntilStable(const double max_rel_mad, double* rel_mad, -+ const Params& p, const Lambda& lambda) { - // Choose initial samples_per_eval based on a single estimated duration. -- Ticks t0 = timer::Start32(); -+ timer::Ticks t0 = timer::Start(); - lambda(); -- Ticks t1 = timer::Stop32(); -- Ticks est = t1 - t0; -+ timer::Ticks t1 = timer::Stop(); -+ timer::Ticks est = t1 - t0; - static const double ticks_per_second = platform::InvariantTicksPerSecond(); - const size_t ticks_per_eval = - static_cast(ticks_per_second * p.seconds_per_eval); -@@ -481,21 +461,21 @@ Ticks SampleUntilStable(const double max - est == 0 ? p.min_samples_per_eval : ticks_per_eval / est; - samples_per_eval = std::max(samples_per_eval, p.min_samples_per_eval); - -- std::vector samples; -+ std::vector samples; - samples.reserve(1 + samples_per_eval); - samples.push_back(est); - - // Percentage is too strict for tiny differences, so also allow a small - // absolute "median absolute deviation". -- const Ticks max_abs_mad = (timer_resolution + 99) / 100; -+ const timer::Ticks max_abs_mad = (timer_resolution + 99) / 100; - *rel_mad = 0.0; // ensure initialized - - for (size_t eval = 0; eval < p.max_evals; ++eval, samples_per_eval *= 2) { - samples.reserve(samples.size() + samples_per_eval); - for (size_t i = 0; i < samples_per_eval; ++i) { -- t0 = timer::Start32(); -+ t0 = timer::Start(); - lambda(); -- t1 = timer::Stop32(); -+ t1 = timer::Stop(); - samples.push_back(t1 - t0); - } - -@@ -508,14 +488,14 @@ Ticks SampleUntilStable(const double max - NANOBENCHMARK_CHECK(est != 0); - - // Median absolute deviation (mad) is a robust measure of 'variability'. -- const Ticks abs_mad = robust_statistics::MedianAbsoluteDeviation( -+ const timer::Ticks abs_mad = robust_statistics::MedianAbsoluteDeviation( - samples.data(), samples.size(), est); -- *rel_mad = static_cast(int(abs_mad)) / est; -+ *rel_mad = static_cast(abs_mad) / static_cast(est); - - if (*rel_mad <= max_rel_mad || abs_mad <= max_abs_mad) { - if (p.verbose) { -- printf("%6zu samples => %5u (abs_mad=%4u, rel_mad=%4.2f%%)\n", -- samples.size(), est, abs_mad, *rel_mad * 100.0); -+ printf("%6zu samples => %5zu (abs_mad=%4zu, rel_mad=%4.2f%%)\n", -+ samples.size(), size_t(est), size_t(abs_mad), *rel_mad * 100.0); - } - return est; - } -@@ -539,29 +519,17 @@ InputVec UniqueInputs(const FuncInput* i - return unique; - } - --// Returns how often we need to call func for sufficient precision, or zero --// on failure (e.g. the elapsed time is too long for a 32-bit tick count). -+// Returns how often we need to call func for sufficient precision. - size_t NumSkip(const Func func, const uint8_t* arg, const InputVec& unique, - const Params& p) { - // Min elapsed ticks for any input. -- Ticks min_duration = ~0u; -+ timer::Ticks min_duration = ~timer::Ticks(0); - - for (const FuncInput input : unique) { -- // Make sure a 32-bit timer is sufficient. -- const uint64_t t0 = timer::Start64(); -- PreventElision(func(arg, input)); -- const uint64_t t1 = timer::Stop64(); -- const uint64_t elapsed = t1 - t0; -- if (elapsed >= (1ULL << 30)) { -- fprintf(stderr, "Measurement failed: need 64-bit timer for input=%zu\n", -- input); -- return 0; -- } -- - double rel_mad; -- const Ticks total = SampleUntilStable( -+ const timer::Ticks total = SampleUntilStable( - p.target_rel_mad, &rel_mad, p, -- [func, arg, input]() { PreventElision(func(arg, input)); }); -+ [func, arg, input]() { platform::PreventElision(func(arg, input)); }); - min_duration = std::min(min_duration, total - timer_resolution); - } - -@@ -571,8 +539,8 @@ size_t NumSkip(const Func func, const ui - const size_t num_skip = - min_duration == 0 ? 0 : (max_skip + min_duration - 1) / min_duration; - if (p.verbose) { -- printf("res=%u max_skip=%zu min_dur=%u num_skip=%zu\n", timer_resolution, -- max_skip, min_duration, num_skip); -+ printf("res=%zu max_skip=%zu min_dur=%zu num_skip=%zu\n", -+ size_t(timer_resolution), max_skip, size_t(min_duration), num_skip); - } - return num_skip; - } -@@ -637,13 +605,14 @@ void FillSubset(const InputVec& full, co - } - - // Returns total ticks elapsed for all inputs. --Ticks TotalDuration(const Func func, const uint8_t* arg, const InputVec* inputs, -- const Params& p, double* max_rel_mad) { -+timer::Ticks TotalDuration(const Func func, const uint8_t* arg, -+ const InputVec* inputs, const Params& p, -+ double* max_rel_mad) { - double rel_mad; -- const Ticks duration = -+ const timer::Ticks duration = - SampleUntilStable(p.target_rel_mad, &rel_mad, p, [func, arg, inputs]() { - for (const FuncInput input : *inputs) { -- PreventElision(func(arg, input)); -+ platform::PreventElision(func(arg, input)); - } - }); - *max_rel_mad = std::max(*max_rel_mad, rel_mad); -@@ -657,19 +626,20 @@ HWY_NOINLINE FuncOutput EmptyFunc(const - - // Returns overhead of accessing inputs[] and calling a function; this will - // be deducted from future TotalDuration return values. --Ticks Overhead(const uint8_t* arg, const InputVec* inputs, const Params& p) { -+timer::Ticks Overhead(const uint8_t* arg, const InputVec* inputs, -+ const Params& p) { - double rel_mad; - // Zero tolerance because repeatability is crucial and EmptyFunc is fast. - return SampleUntilStable(0.0, &rel_mad, p, [arg, inputs]() { - for (const FuncInput input : *inputs) { -- PreventElision(EmptyFunc(arg, input)); -+ platform::PreventElision(EmptyFunc(arg, input)); - } - }); - } - - } // namespace - --int Unpredictable1() { return timer::Start64() != ~0ULL; } -+int Unpredictable1() { return timer::Start() != ~0ULL; } - - size_t Measure(const Func func, const uint8_t* arg, const FuncInput* inputs, - const size_t num_inputs, Result* results, const Params& p) { -@@ -685,32 +655,35 @@ size_t Measure(const Func func, const ui - ReplicateInputs(inputs, num_inputs, unique.size(), num_skip, p); - InputVec subset(full.size() - num_skip); - -- const Ticks overhead = Overhead(arg, &full, p); -- const Ticks overhead_skip = Overhead(arg, &subset, p); -+ const timer::Ticks overhead = Overhead(arg, &full, p); -+ const timer::Ticks overhead_skip = Overhead(arg, &subset, p); - if (overhead < overhead_skip) { -- fprintf(stderr, "Measurement failed: overhead %u < %u\n", overhead, -- overhead_skip); -+ fprintf(stderr, "Measurement failed: overhead %zu < %zu\n", -+ size_t(overhead), size_t(overhead_skip)); - return 0; - } - - if (p.verbose) { -- printf("#inputs=%5zu,%5zu overhead=%5u,%5u\n", full.size(), subset.size(), -- overhead, overhead_skip); -+ printf("#inputs=%5zu,%5zu overhead=%5zu,%5zu\n", full.size(), subset.size(), -+ size_t(overhead), size_t(overhead_skip)); - } - - double max_rel_mad = 0.0; -- const Ticks total = TotalDuration(func, arg, &full, p, &max_rel_mad); -+ const timer::Ticks total = TotalDuration(func, arg, &full, p, &max_rel_mad); - - for (size_t i = 0; i < unique.size(); ++i) { - FillSubset(full, unique[i], num_skip, &subset); -- const Ticks total_skip = TotalDuration(func, arg, &subset, p, &max_rel_mad); -+ const timer::Ticks total_skip = -+ TotalDuration(func, arg, &subset, p, &max_rel_mad); - - if (total < total_skip) { -- fprintf(stderr, "Measurement failed: total %u < %u\n", total, total_skip); -+ fprintf(stderr, "Measurement failed: total %zu < %zu\n", size_t(total), -+ size_t(total_skip)); - return 0; - } - -- const Ticks duration = (total - overhead) - (total_skip - overhead_skip); -+ const timer::Ticks duration = -+ (total - overhead) - (total_skip - overhead_skip); - results[i].input = unique[i]; - results[i].ticks = static_cast(duration) * mul; - results[i].variability = static_cast(max_rel_mad); -diff -up chromium-92.0.4515.107/third_party/highway/src/hwy/nanobenchmark.h.update-highway-0.12.2 chromium-92.0.4515.107/third_party/highway/src/hwy/nanobenchmark.h ---- chromium-92.0.4515.107/third_party/highway/src/hwy/nanobenchmark.h.update-highway-0.12.2 2021-07-19 14:47:23.000000000 -0400 -+++ chromium-92.0.4515.107/third_party/highway/src/hwy/nanobenchmark.h 2021-07-26 17:17:12.094291603 -0400 -@@ -44,11 +44,6 @@ - // central tendency of the measurement samples with the "half sample mode", - // which is more robust to outliers and skewed data than the mean or median. - --// WARNING if included from multiple translation units compiled with distinct --// flags: this header requires textual inclusion and a predefined NB_NAMESPACE --// macro that is unique to the current compile flags. We must also avoid --// standard library headers such as vector and functional that define functions. -- - #include - #include - -@@ -79,6 +74,16 @@ namespace platform { - // This call may be expensive, callers should cache the result. - double InvariantTicksPerSecond(); - -+// Returns current timestamp [in seconds] relative to an unspecified origin. -+// Features: monotonic (no negative elapsed time), steady (unaffected by system -+// time changes), high-resolution (on the order of microseconds). -+double Now(); -+ -+// Returns ticks elapsed in back to back timer calls, i.e. a function of the -+// timer resolution (minimum measurable difference) and overhead. -+// This call is expensive, callers should cache the result. -+uint64_t TimerResolution(); -+ - } // namespace platform - - // Returns 1, but without the compiler knowing what the value is. This prevents -diff -up chromium-92.0.4515.107/third_party/highway/src/hwy/nanobenchmark_test.cc.update-highway-0.12.2 chromium-92.0.4515.107/third_party/highway/src/hwy/nanobenchmark_test.cc ---- chromium-92.0.4515.107/third_party/highway/src/hwy/nanobenchmark_test.cc.update-highway-0.12.2 2021-07-19 14:47:23.000000000 -0400 -+++ chromium-92.0.4515.107/third_party/highway/src/hwy/nanobenchmark_test.cc 2021-07-26 17:10:30.283171481 -0400 -@@ -15,11 +15,11 @@ - #include "hwy/nanobenchmark.h" - - #include --#include // strtol --#include // sleep - - #include - -+#include "hwy/tests/test_util-inl.h" -+ - namespace hwy { - namespace { - -@@ -31,6 +31,7 @@ FuncOutput Div(const void*, FuncInput in - - template - void MeasureDiv(const FuncInput (&inputs)[N]) { -+ printf("Measuring integer division (output on final two lines)\n"); - Result results[N]; - Params params; - params.max_evals = 4; // avoid test timeout -@@ -66,39 +67,14 @@ void MeasureRandom(const FuncInput (&inp - } - } - --template --void EnsureLongMeasurementFails(const FuncInput (&inputs)[N]) { -- printf("Expect a 'measurement failed' below:\n"); -- Result results[N]; -- -- const size_t num_results = Measure( -- [](const void*, const FuncInput input) -> FuncOutput { -- // Loop until the sleep succeeds (not interrupted by signal). We assume -- // >= 512 MHz, so 2 seconds will exceed the 1 << 30 tick safety limit. -- while (sleep(2) != 0) { -- } -- return input; -- }, -- nullptr, inputs, N, results); -- NANOBENCHMARK_CHECK(num_results == 0); -- (void)num_results; --} -- --void RunAll(const int argc, char** /*argv*/) { -- // unpredictable == 1 but the compiler doesn't know that. -- const int unpredictable = argc != 999; -+TEST(NanobenchmarkTest, RunAll) { -+ const int unpredictable = Unpredictable1(); // == 1, unknown to compiler. - static const FuncInput inputs[] = {static_cast(unpredictable) + 2, - static_cast(unpredictable + 9)}; - - MeasureDiv(inputs); - MeasureRandom(inputs); -- EnsureLongMeasurementFails(inputs); - } - - } // namespace - } // namespace hwy -- --int main(int argc, char* argv[]) { -- hwy::RunAll(argc, argv); -- return 0; --} -diff -up chromium-92.0.4515.107/third_party/highway/src/hwy/ops/arm_neon-inl.h.update-highway-0.12.2 chromium-92.0.4515.107/third_party/highway/src/hwy/ops/arm_neon-inl.h ---- chromium-92.0.4515.107/third_party/highway/src/hwy/ops/arm_neon-inl.h.update-highway-0.12.2 2021-07-19 14:47:23.000000000 -0400 -+++ chromium-92.0.4515.107/third_party/highway/src/hwy/ops/arm_neon-inl.h 2021-07-26 17:20:19.294142914 -0400 -@@ -26,6 +26,8 @@ HWY_BEFORE_NAMESPACE(); - namespace hwy { - namespace HWY_NAMESPACE { - -+namespace detail { // for code folding and Raw128 -+ - // Macros used to define single and double function calls for multiple types - // for full and half vectors. These macros are undefined at the end of the file. - -@@ -437,12 +439,14 @@ struct Raw128 { - using type = int8x8_t; - }; - -+} // namespace detail -+ - template - using Full128 = Simd; - - template - class Vec128 { -- using Raw = typename Raw128::type; -+ using Raw = typename detail::Raw128::type; - - public: - HWY_INLINE Vec128() {} -@@ -480,7 +484,8 @@ class Vec128 { - // FF..FF or 0, also for floating-point - see README. - template - class Mask128 { -- using Raw = typename Raw128::type; -+ // ARM C Language Extensions return and expect unsigned type. -+ using Raw = typename detail::Raw128, N>::type; - - public: - HWY_INLINE Mask128() {} -@@ -664,15 +669,25 @@ template - HWY_INLINE Vec128 Undefined(Simd /*d*/) { - HWY_DIAGNOSTICS(push) - HWY_DIAGNOSTICS_OFF(disable : 4701, ignored "-Wuninitialized") -- typename Raw128::type a; -+ typename detail::Raw128::type a; - return Vec128(a); - HWY_DIAGNOSTICS(pop) - } - --// ------------------------------ Extract lane -+// Returns a vector with lane i=[0, N) set to "first" + i. -+template -+Vec128 Iota(const Simd d, const T2 first) { -+ HWY_ALIGN T lanes[16 / sizeof(T)]; -+ for (size_t i = 0; i < 16 / sizeof(T); ++i) { -+ lanes[i] = static_cast(first + static_cast(i)); -+ } -+ return Load(d, lanes); -+} -+ -+// ------------------------------ GetLane - - HWY_INLINE uint8_t GetLane(const Vec128 v) { -- return vget_lane_u8(vget_low_u8(v.raw), 0); -+ return vgetq_lane_u8(v.raw, 0); - } - template - HWY_INLINE uint8_t GetLane(const Vec128 v) { -@@ -680,7 +695,7 @@ HWY_INLINE uint8_t GetLane(const Vec128< - } - - HWY_INLINE int8_t GetLane(const Vec128 v) { -- return vget_lane_s8(vget_low_s8(v.raw), 0); -+ return vgetq_lane_s8(v.raw, 0); - } - template - HWY_INLINE int8_t GetLane(const Vec128 v) { -@@ -688,7 +703,7 @@ HWY_INLINE int8_t GetLane(const Vec128 v) { -- return vget_lane_u16(vget_low_u16(v.raw), 0); -+ return vgetq_lane_u16(v.raw, 0); - } - template - HWY_INLINE uint16_t GetLane(const Vec128 v) { -@@ -696,7 +711,7 @@ HWY_INLINE uint16_t GetLane(const Vec128 - } - - HWY_INLINE int16_t GetLane(const Vec128 v) { -- return vget_lane_s16(vget_low_s16(v.raw), 0); -+ return vgetq_lane_s16(v.raw, 0); - } - template - HWY_INLINE int16_t GetLane(const Vec128 v) { -@@ -704,7 +719,7 @@ HWY_INLINE int16_t GetLane(const Vec128< - } - - HWY_INLINE uint32_t GetLane(const Vec128 v) { -- return vget_lane_u32(vget_low_u32(v.raw), 0); -+ return vgetq_lane_u32(v.raw, 0); - } - template - HWY_INLINE uint32_t GetLane(const Vec128 v) { -@@ -712,7 +727,7 @@ HWY_INLINE uint32_t GetLane(const Vec128 - } - - HWY_INLINE int32_t GetLane(const Vec128 v) { -- return vget_lane_s32(vget_low_s32(v.raw), 0); -+ return vgetq_lane_s32(v.raw, 0); - } - template - HWY_INLINE int32_t GetLane(const Vec128 v) { -@@ -720,20 +735,20 @@ HWY_INLINE int32_t GetLane(const Vec128< - } - - HWY_INLINE uint64_t GetLane(const Vec128 v) { -- return vget_lane_u64(vget_low_u64(v.raw), 0); -+ return vgetq_lane_u64(v.raw, 0); - } - HWY_INLINE uint64_t GetLane(const Vec128 v) { - return vget_lane_u64(v.raw, 0); - } - HWY_INLINE int64_t GetLane(const Vec128 v) { -- return vget_lane_s64(vget_low_s64(v.raw), 0); -+ return vgetq_lane_s64(v.raw, 0); - } - HWY_INLINE int64_t GetLane(const Vec128 v) { - return vget_lane_s64(v.raw, 0); - } - - HWY_INLINE float GetLane(const Vec128 v) { -- return vget_lane_f32(vget_low_f32(v.raw), 0); -+ return vgetq_lane_f32(v.raw, 0); - } - HWY_INLINE float GetLane(const Vec128 v) { - return vget_lane_f32(v.raw, 0); -@@ -743,7 +758,7 @@ HWY_INLINE float GetLane(const Vec128 v) { -- return vget_lane_f64(vget_low_f64(v.raw), 0); -+ return vgetq_lane_f64(v.raw, 0); - } - HWY_INLINE double GetLane(const Vec128 v) { - return vget_lane_f64(v.raw, 0); -@@ -785,8 +800,6 @@ HWY_NEON_DEF_FUNCTION_INT_64(SaturatedSu - // ------------------------------ Average - - // Returns (a + b + 1) / 2 -- --// Unsigned - HWY_NEON_DEF_FUNCTION_UINT_8(AverageRound, vrhadd, _, 2) - HWY_NEON_DEF_FUNCTION_UINT_16(AverageRound, vrhadd, _, 2) - -@@ -802,6 +815,7 @@ HWY_INLINE Vec128 Abs(const Vec - HWY_INLINE Vec128 Abs(const Vec128 v) { - return Vec128(vabsq_s32(v.raw)); - } -+// i64 is implemented after BroadcastSignBit. - HWY_INLINE Vec128 Abs(const Vec128 v) { - return Vec128(vabsq_f32(v.raw)); - } -@@ -1184,21 +1198,34 @@ HWY_INLINE Vec128 ApproximateR - #if HWY_ARCH_ARM_A64 - HWY_NEON_DEF_FUNCTION_ALL_FLOATS(operator/, vdiv, _, 2) - #else --// Emulated with approx reciprocal + Newton-Raphson + mul -+// Not defined on armv7: approximate -+namespace detail { -+ -+HWY_INLINE Vec128 ReciprocalNewtonRaphsonStep( -+ const Vec128 recip, const Vec128 divisor) { -+ return Vec128(vrecpsq_f32(recip.raw, divisor.raw)); -+} -+template -+HWY_INLINE Vec128 ReciprocalNewtonRaphsonStep( -+ const Vec128 recip, Vec128 divisor) { -+ return Vec128(vrecps_f32(recip.raw, divisor.raw)); -+} -+ -+} // namespace detail -+ - template - HWY_INLINE Vec128 operator/(const Vec128 a, - const Vec128 b) { - auto x = ApproximateReciprocal(b); -- // Newton-Raphson on 1/x - b -- const auto two = Set(Simd(), 2); -- x = x * (two - b * x); -- x = x * (two - b * x); -- x = x * (two - b * x); -+ x *= detail::ReciprocalNewtonRaphsonStep(x, b); -+ x *= detail::ReciprocalNewtonRaphsonStep(x, b); -+ x *= detail::ReciprocalNewtonRaphsonStep(x, b); - return a * x; - } - #endif - --// Absolute value of difference. -+// ------------------------------ Absolute value of difference. -+ - HWY_INLINE Vec128 AbsDiff(const Vec128 a, const Vec128 b) { - return Vec128(vabdq_f32(a.raw, b.raw)); - } -@@ -1312,7 +1339,7 @@ HWY_INLINE Vec128 NegMulSub(c - } - #endif - --// ------------------------------ Floating-point square root -+// ------------------------------ Floating-point square root (IfThenZeroElse) - - // Approximate reciprocal square root - HWY_INLINE Vec128 ApproximateReciprocalSqrt(const Vec128 v) { -@@ -1328,77 +1355,33 @@ HWY_INLINE Vec128 ApproximateR - #if HWY_ARCH_ARM_A64 - HWY_NEON_DEF_FUNCTION_ALL_FLOATS(Sqrt, vsqrt, _, 1) - #else --// Not defined on armv7: emulate with approx reciprocal sqrt + Goldschmidt. --template --HWY_INLINE Vec128 Sqrt(const Vec128 v) { -- auto b = v; -- auto Y = ApproximateReciprocalSqrt(v); -- auto x = v * Y; -- const auto half = Set(Simd(), 0.5); -- const auto oneandhalf = Set(Simd(), 1.5); -- for (size_t i = 0; i < 3; i++) { -- b = b * Y * Y; -- Y = oneandhalf - half * b; -- x = x * Y; -- } -- return IfThenZeroElse(v == Zero(Simd()), x); --} --#endif -- --// ================================================== COMPARE -- --// Comparisons fill a lane with 1-bits if the condition is true, else 0. -+namespace detail { - --template --HWY_API Mask128 RebindMask(Simd /*tag*/, Mask128 m) { -- static_assert(sizeof(TFrom) == sizeof(TTo), "Must have same size"); -- return Mask128{m.raw}; -+HWY_INLINE Vec128 ReciprocalSqrtStep(const Vec128 root, -+ const Vec128 recip) { -+ return Vec128(vrsqrtsq_f32(root.raw, recip.raw)); -+} -+template -+HWY_INLINE Vec128 ReciprocalSqrtStep(const Vec128 root, -+ Vec128 recip) { -+ return Vec128(vrsqrts_f32(root.raw, recip.raw)); - } - --#define HWY_NEON_BUILD_TPL_HWY_COMPARE --#define HWY_NEON_BUILD_RET_HWY_COMPARE(type, size) Mask128 --#define HWY_NEON_BUILD_PARAM_HWY_COMPARE(type, size) \ -- const Vec128 a, const Vec128 b --#define HWY_NEON_BUILD_ARG_HWY_COMPARE a.raw, b.raw -- --// ------------------------------ Equality --HWY_NEON_DEF_FUNCTION_ALL_FLOATS(operator==, vceq, _, HWY_COMPARE) --#if HWY_ARCH_ARM_A64 --HWY_NEON_DEF_FUNCTION_INTS_UINTS(operator==, vceq, _, HWY_COMPARE) --#else --// No 64-bit comparisons on armv7: emulate them below, after Shuffle2301. --HWY_NEON_DEF_FUNCTION_INT_8_16_32(operator==, vceq, _, HWY_COMPARE) --HWY_NEON_DEF_FUNCTION_UINT_8_16_32(operator==, vceq, _, HWY_COMPARE) --#endif -+} // namespace detail - --// ------------------------------ Strict inequality -+// Not defined on armv7: approximate -+template -+HWY_INLINE Vec128 Sqrt(const Vec128 v) { -+ auto recip = ApproximateReciprocalSqrt(v); - --// Signed/float < (no unsigned) --#if HWY_ARCH_ARM_A64 --HWY_NEON_DEF_FUNCTION_INTS(operator<, vclt, _, HWY_COMPARE) --#else --HWY_NEON_DEF_FUNCTION_INT_8_16_32(operator<, vclt, _, HWY_COMPARE) --#endif --HWY_NEON_DEF_FUNCTION_ALL_FLOATS(operator<, vclt, _, HWY_COMPARE) -+ recip *= detail::ReciprocalSqrtStep(v * recip, recip); -+ recip *= detail::ReciprocalSqrtStep(v * recip, recip); -+ recip *= detail::ReciprocalSqrtStep(v * recip, recip); - --// Signed/float > (no unsigned) --#if HWY_ARCH_ARM_A64 --HWY_NEON_DEF_FUNCTION_INTS(operator>, vcgt, _, HWY_COMPARE) --#else --HWY_NEON_DEF_FUNCTION_INT_8_16_32(operator>, vcgt, _, HWY_COMPARE) -+ const auto root = v * recip; -+ return IfThenZeroElse(v == Zero(Simd()), root); -+} - #endif --HWY_NEON_DEF_FUNCTION_ALL_FLOATS(operator>, vcgt, _, HWY_COMPARE) -- --// ------------------------------ Weak inequality -- --// Float <= >= --HWY_NEON_DEF_FUNCTION_ALL_FLOATS(operator<=, vcle, _, HWY_COMPARE) --HWY_NEON_DEF_FUNCTION_ALL_FLOATS(operator>=, vcge, _, HWY_COMPARE) -- --#undef HWY_NEON_BUILD_TPL_HWY_COMPARE --#undef HWY_NEON_BUILD_RET_HWY_COMPARE --#undef HWY_NEON_BUILD_PARAM_HWY_COMPARE --#undef HWY_NEON_BUILD_ARG_HWY_COMPARE - - // ================================================== LOGICAL - -@@ -1407,13 +1390,16 @@ HWY_NEON_DEF_FUNCTION_ALL_FLOATS(operato - // There is no 64-bit vmvn, so cast instead of using HWY_NEON_DEF_FUNCTION. - template - HWY_INLINE Vec128 Not(const Vec128 v) { -- const Full128 d8; -- return Vec128(vmvnq_u8(BitCast(d8, v).raw)); -+ const Full128 d; -+ const Repartition d8; -+ return BitCast(d, Vec128(vmvnq_u8(BitCast(d8, v).raw))); - } - template - HWY_INLINE Vec128 Not(const Vec128 v) { -- const Repartition> d8; -- return Vec128(vmvn_u8(BitCast(d8, v).raw)); -+ const Simd d; -+ const Repartition d8; -+ using V8 = decltype(Zero(d8)); -+ return BitCast(d, V8(vmvn_u8(BitCast(d8, v).raw))); - } - - // ------------------------------ And -@@ -1513,33 +1499,38 @@ HWY_API Vec128 BroadcastSignBit(co - return ShiftRight(v); - } - --// ------------------------------ Make mask -+// ================================================== MASK - --template --HWY_INLINE Mask128 TestBit(Vec128 v, Vec128 bit) { -- static_assert(!hwy::IsFloat(), "Only integer vectors supported"); -- return (v & bit) == bit; --} -+// ------------------------------ To/from vector - --// Mask and Vec are the same (true = FF..FF). -+// Mask and Vec have the same representation (true = FF..FF). - template - HWY_INLINE Mask128 MaskFromVec(const Vec128 v) { -- return Mask128(v.raw); -+ const Simd, N> du; -+ return Mask128(BitCast(du, v).raw); - } - -+// DEPRECATED - template - HWY_INLINE Vec128 VecFromMask(const Mask128 v) { -- return Vec128(v.raw); -+ return BitCast(Simd(), Vec128, N>(v.raw)); - } - - template --HWY_INLINE Vec128 VecFromMask(Simd /* tag */, -- const Mask128 v) { -- return Vec128(v.raw); -+HWY_INLINE Vec128 VecFromMask(Simd d, const Mask128 v) { -+ return BitCast(d, Vec128, N>(v.raw)); -+} -+ -+// ------------------------------ RebindMask -+ -+template -+HWY_API Mask128 RebindMask(Simd dto, Mask128 m) { -+ static_assert(sizeof(TFrom) == sizeof(TTo), "Must have same size"); -+ return MaskFromVec(BitCast(dto, VecFromMask(Simd(), m))); - } - --// IfThenElse(mask, yes, no) --// Returns mask ? b : a. -+// ------------------------------ IfThenElse(mask, yes, no) = mask ? b : a. -+ - #define HWY_NEON_BUILD_TPL_HWY_IF - #define HWY_NEON_BUILD_RET_HWY_IF(type, size) Vec128 - #define HWY_NEON_BUILD_PARAM_HWY_IF(type, size) \ -@@ -1574,7 +1565,6 @@ HWY_INLINE Vec128 ZeroIfNegative(V - return Max(zero, v); - } - -- - // ------------------------------ Mask logical - - template -@@ -1607,30 +1597,183 @@ HWY_API Mask128 Xor(const Mask128< - return MaskFromVec(Xor(VecFromMask(d, a), VecFromMask(d, b))); - } - --// ------------------------------ Min (IfThenElse, BroadcastSignBit) -+// ================================================== COMPARE - --namespace detail { -+// Comparisons fill a lane with 1-bits if the condition is true, else 0. -+ -+// ------------------------------ Shuffle2301 (for i64 compares) -+ -+// Swap 32-bit halves in 64-bits -+HWY_INLINE Vec128 Shuffle2301(const Vec128 v) { -+ return Vec128(vrev64_u32(v.raw)); -+} -+HWY_INLINE Vec128 Shuffle2301(const Vec128 v) { -+ return Vec128(vrev64_s32(v.raw)); -+} -+HWY_INLINE Vec128 Shuffle2301(const Vec128 v) { -+ return Vec128(vrev64_f32(v.raw)); -+} -+HWY_INLINE Vec128 Shuffle2301(const Vec128 v) { -+ return Vec128(vrev64q_u32(v.raw)); -+} -+HWY_INLINE Vec128 Shuffle2301(const Vec128 v) { -+ return Vec128(vrev64q_s32(v.raw)); -+} -+HWY_INLINE Vec128 Shuffle2301(const Vec128 v) { -+ return Vec128(vrev64q_f32(v.raw)); -+} -+ -+#define HWY_NEON_BUILD_TPL_HWY_COMPARE -+#define HWY_NEON_BUILD_RET_HWY_COMPARE(type, size) Mask128 -+#define HWY_NEON_BUILD_PARAM_HWY_COMPARE(type, size) \ -+ const Vec128 a, const Vec128 b -+#define HWY_NEON_BUILD_ARG_HWY_COMPARE a.raw, b.raw - -+// ------------------------------ Equality -+HWY_NEON_DEF_FUNCTION_ALL_FLOATS(operator==, vceq, _, HWY_COMPARE) - #if HWY_ARCH_ARM_A64 -+HWY_NEON_DEF_FUNCTION_INTS_UINTS(operator==, vceq, _, HWY_COMPARE) -+#else -+// No 64-bit comparisons on armv7: emulate them below, after Shuffle2301. -+HWY_NEON_DEF_FUNCTION_INT_8_16_32(operator==, vceq, _, HWY_COMPARE) -+HWY_NEON_DEF_FUNCTION_UINT_8_16_32(operator==, vceq, _, HWY_COMPARE) -+#endif - --HWY_INLINE Vec128 Gt(Vec128 a, Vec128 b) { -- return Vec128(vcgtq_u64(a.raw, b.raw)); -+// ------------------------------ Strict inequality (signed, float) -+#if HWY_ARCH_ARM_A64 -+HWY_NEON_DEF_FUNCTION_INTS(operator<, vclt, _, HWY_COMPARE) -+#else -+HWY_NEON_DEF_FUNCTION_INT_8_16_32(operator<, vclt, _, HWY_COMPARE) -+#endif -+HWY_NEON_DEF_FUNCTION_ALL_FLOATS(operator<, vclt, _, HWY_COMPARE) -+ -+// ------------------------------ Weak inequality (float) -+HWY_NEON_DEF_FUNCTION_ALL_FLOATS(operator<=, vcle, _, HWY_COMPARE) -+ -+#undef HWY_NEON_BUILD_TPL_HWY_COMPARE -+#undef HWY_NEON_BUILD_RET_HWY_COMPARE -+#undef HWY_NEON_BUILD_PARAM_HWY_COMPARE -+#undef HWY_NEON_BUILD_ARG_HWY_COMPARE -+ -+// ------------------------------ ARMv7 i64 compare (Shuffle2301, Eq) -+ -+#if HWY_ARCH_ARM_V7 -+ -+template -+HWY_INLINE Mask128 operator==(const Vec128 a, -+ const Vec128 b) { -+ const Simd d32; -+ const Simd d64; -+ const auto cmp32 = VecFromMask(d32, Eq(BitCast(d32, a), BitCast(d32, b))); -+ const auto cmp64 = cmp32 & Shuffle2301(cmp32); -+ return MaskFromVec(BitCast(d64, cmp64)); - } --HWY_INLINE Vec128 Gt(Vec128 a, -- Vec128 b) { -- return Vec128(vcgt_u64(a.raw, b.raw)); -+ -+template -+HWY_INLINE Mask128 operator==(const Vec128 a, -+ const Vec128 b) { -+ const Simd d32; -+ const Simd d64; -+ const auto cmp32 = VecFromMask(d32, Eq(BitCast(d32, a), BitCast(d32, b))); -+ const auto cmp64 = cmp32 & Shuffle2301(cmp32); -+ return MaskFromVec(BitCast(d64, cmp64)); - } - --HWY_INLINE Vec128 Gt(Vec128 a, Vec128 b) { -- return Vec128(vcgtq_s64(a.raw, b.raw)); -+HWY_INLINE Mask128 operator<(const Vec128 a, -+ const Vec128 b) { -+ const int64x2_t sub = vqsubq_s64(a.raw, b.raw); -+ return MaskFromVec(BroadcastSignBit(Vec128(sub))); - } --HWY_INLINE Vec128 Gt(Vec128 a, Vec128 b) { -- return Vec128(vcgt_s64(a.raw, b.raw)); -+HWY_INLINE Mask128 operator<(const Vec128 a, -+ const Vec128 b) { -+ const int64x1_t sub = vqsub_s64(a.raw, b.raw); -+ return MaskFromVec(BroadcastSignBit(Vec128(sub))); - } - - #endif - --} // namespace detail -+// ------------------------------ Reversed comparisons -+ -+template -+HWY_API Mask128 operator>(Vec128 a, Vec128 b) { -+ return operator<(b, a); -+} -+template -+HWY_API Mask128 operator>=(Vec128 a, Vec128 b) { -+ return operator<=(b, a); -+} -+ -+// ------------------------------ FirstN (Iota, Lt) -+ -+template -+HWY_API Mask128 FirstN(const Simd d, size_t num) { -+ const RebindToSigned di; // Signed comparisons are cheaper. -+ return RebindMask(d, Iota(di, 0) < Set(di, static_cast>(num))); -+} -+ -+// ------------------------------ TestBit (Eq) -+ -+#define HWY_NEON_BUILD_TPL_HWY_TESTBIT -+#define HWY_NEON_BUILD_RET_HWY_TESTBIT(type, size) Mask128 -+#define HWY_NEON_BUILD_PARAM_HWY_TESTBIT(type, size) \ -+ Vec128 v, Vec128 bit -+#define HWY_NEON_BUILD_ARG_HWY_TESTBIT v.raw, bit.raw -+ -+#if HWY_ARCH_ARM_A64 -+HWY_NEON_DEF_FUNCTION_INTS_UINTS(TestBit, vtst, _, HWY_TESTBIT) -+#else -+// No 64-bit versions on armv7 -+HWY_NEON_DEF_FUNCTION_UINT_8_16_32(TestBit, vtst, _, HWY_TESTBIT) -+HWY_NEON_DEF_FUNCTION_INT_8_16_32(TestBit, vtst, _, HWY_TESTBIT) -+ -+template -+HWY_INLINE Mask128 TestBit(Vec128 v, -+ Vec128 bit) { -+ return (v & bit) == bit; -+} -+template -+HWY_INLINE Mask128 TestBit(Vec128 v, -+ Vec128 bit) { -+ return (v & bit) == bit; -+} -+ -+#endif -+#undef HWY_NEON_BUILD_TPL_HWY_TESTBIT -+#undef HWY_NEON_BUILD_RET_HWY_TESTBIT -+#undef HWY_NEON_BUILD_PARAM_HWY_TESTBIT -+#undef HWY_NEON_BUILD_ARG_HWY_TESTBIT -+ -+// ------------------------------ Abs i64 (IfThenElse, BroadcastSignBit) -+HWY_INLINE Vec128 Abs(const Vec128 v) { -+#if HWY_ARCH_ARM_A64 -+ return Vec128(vabsq_s64(v.raw)); -+#else -+ const auto zero = Zero(Full128()); -+ return IfThenElse(MaskFromVec(BroadcastSignBit(v)), zero - v, v); -+#endif -+} -+HWY_INLINE Vec128 Abs(const Vec128 v) { -+#if HWY_ARCH_ARM_A64 -+ return Vec128(vabs_s64(v.raw)); -+#else -+ const auto zero = Zero(Simd()); -+ return IfThenElse(MaskFromVec(BroadcastSignBit(v)), zero - v, v); -+#endif -+} -+ -+// ------------------------------ Min (IfThenElse, BroadcastSignBit) -+ -+#if HWY_ARCH_ARM_A64 -+ -+HWY_INLINE Mask128 operator<(Vec128 a, Vec128 b) { -+ return Mask128(vcltq_u64(a.raw, b.raw)); -+} -+HWY_INLINE Mask128 operator<(Vec128 a, -+ Vec128 b) { -+ return Mask128(vclt_u64(a.raw, b.raw)); -+} -+ -+#endif - - // Unsigned - HWY_NEON_DEF_FUNCTION_UINT_8_16_32(Min, vmin, _, 2) -@@ -1639,7 +1782,7 @@ template - HWY_INLINE Vec128 Min(const Vec128 a, - const Vec128 b) { - #if HWY_ARCH_ARM_A64 -- return IfThenElse(MaskFromVec(detail::Gt(a, b)), b, a); -+ return IfThenElse(b < a, b, a); - #else - const Simd du; - const Simd di; -@@ -1654,7 +1797,7 @@ template - HWY_INLINE Vec128 Min(const Vec128 a, - const Vec128 b) { - #if HWY_ARCH_ARM_A64 -- return IfThenElse(MaskFromVec(detail::Gt(a, b)), b, a); -+ return IfThenElse(b < a, b, a); - #else - const Vec128 sign = detail::SaturatedSub(a, b); - return IfThenElse(MaskFromVec(BroadcastSignBit(sign)), a, b); -@@ -1677,7 +1820,7 @@ template - HWY_INLINE Vec128 Max(const Vec128 a, - const Vec128 b) { - #if HWY_ARCH_ARM_A64 -- return IfThenElse(MaskFromVec(detail::Gt(a, b)), a, b); -+ return IfThenElse(b < a, a, b); - #else - const Simd du; - const Simd di; -@@ -1692,7 +1835,7 @@ template - HWY_INLINE Vec128 Max(const Vec128 a, - const Vec128 b) { - #if HWY_ARCH_ARM_A64 -- return IfThenElse(MaskFromVec(detail::Gt(a, b)), a, b); -+ return IfThenElse(b < a, a, b); - #else - const Vec128 sign = detail::SaturatedSub(a, b); - return IfThenElse(MaskFromVec(BroadcastSignBit(sign)), b, a); -@@ -1805,73 +1948,72 @@ HWY_INLINE Vec128 LoadU(Simd< - // we don't actually care what is in it, and we don't want - // to introduce extra overhead by initializing it to something. - --HWY_INLINE Vec128 LoadU(Simd d, -+HWY_INLINE Vec128 LoadU(Simd /*tag*/, - const uint8_t* HWY_RESTRICT p) { -- uint32x2_t a = Undefined(d).raw; -+ uint32x2_t a = Undefined(Simd()).raw; - uint32x2_t b = vld1_lane_u32(reinterpret_cast(p), a, 0); - return Vec128(vreinterpret_u8_u32(b)); - } --HWY_INLINE Vec128 LoadU(Simd d, -+HWY_INLINE Vec128 LoadU(Simd /*tag*/, - const uint16_t* HWY_RESTRICT p) { -- uint32x2_t a = Undefined(d).raw; -+ uint32x2_t a = Undefined(Simd()).raw; - uint32x2_t b = vld1_lane_u32(reinterpret_cast(p), a, 0); - return Vec128(vreinterpret_u16_u32(b)); - } --HWY_INLINE Vec128 LoadU(Simd d, -+HWY_INLINE Vec128 LoadU(Simd /*tag*/, - const uint32_t* HWY_RESTRICT p) { -- uint32x2_t a = Undefined(d).raw; -+ uint32x2_t a = Undefined(Simd()).raw; - uint32x2_t b = vld1_lane_u32(p, a, 0); - return Vec128(b); - } --HWY_INLINE Vec128 LoadU(Simd d, -+HWY_INLINE Vec128 LoadU(Simd /*tag*/, - const int8_t* HWY_RESTRICT p) { -- int32x2_t a = Undefined(d).raw; -+ int32x2_t a = Undefined(Simd()).raw; - int32x2_t b = vld1_lane_s32(reinterpret_cast(p), a, 0); - return Vec128(vreinterpret_s8_s32(b)); - } --HWY_INLINE Vec128 LoadU(Simd d, -+HWY_INLINE Vec128 LoadU(Simd /*tag*/, - const int16_t* HWY_RESTRICT p) { -- int32x2_t a = Undefined(d).raw; -+ int32x2_t a = Undefined(Simd()).raw; - int32x2_t b = vld1_lane_s32(reinterpret_cast(p), a, 0); - return Vec128(vreinterpret_s16_s32(b)); - } --HWY_INLINE Vec128 LoadU(Simd d, -+HWY_INLINE Vec128 LoadU(Simd /*tag*/, - const int32_t* HWY_RESTRICT p) { -- int32x2_t a = Undefined(d).raw; -+ int32x2_t a = Undefined(Simd()).raw; - int32x2_t b = vld1_lane_s32(p, a, 0); - return Vec128(b); - } --HWY_INLINE Vec128 LoadU(Simd d, -+HWY_INLINE Vec128 LoadU(Simd /*tag*/, - const float* HWY_RESTRICT p) { -- float32x2_t a = Undefined(d).raw; -+ float32x2_t a = Undefined(Simd()).raw; - float32x2_t b = vld1_lane_f32(p, a, 0); - return Vec128(b); - } - - // ------------------------------ Load 16 - --HWY_INLINE Vec128 LoadU(Simd d, -+HWY_INLINE Vec128 LoadU(Simd /*tag*/, - const uint8_t* HWY_RESTRICT p) { -- uint16x4_t a = Undefined(d).raw; -+ uint16x4_t a = Undefined(Simd()).raw; - uint16x4_t b = vld1_lane_u16(reinterpret_cast(p), a, 0); - return Vec128(vreinterpret_u8_u16(b)); - } --HWY_INLINE Vec128 LoadU(Simd d, -+HWY_INLINE Vec128 LoadU(Simd /*tag*/, - const uint16_t* HWY_RESTRICT p) { -- uint16x4_t a = Undefined(d).raw; -+ uint16x4_t a = Undefined(Simd()).raw; - uint16x4_t b = vld1_lane_u16(p, a, 0); - return Vec128(b); - } -- --HWY_INLINE Vec128 LoadU(Simd d, -+HWY_INLINE Vec128 LoadU(Simd /*tag*/, - const int8_t* HWY_RESTRICT p) { -- int16x4_t a = Undefined(d).raw; -+ int16x4_t a = Undefined(Simd()).raw; - int16x4_t b = vld1_lane_s16(reinterpret_cast(p), a, 0); - return Vec128(vreinterpret_s8_s16(b)); - } --HWY_INLINE Vec128 LoadU(Simd d, -+HWY_INLINE Vec128 LoadU(Simd /*tag*/, - const int16_t* HWY_RESTRICT p) { -- int16x4_t a = Undefined(d).raw; -+ int16x4_t a = Undefined(Simd()).raw; - int16x4_t b = vld1_lane_s16(p, a, 0); - return Vec128(b); - } -@@ -2009,12 +2151,12 @@ HWY_INLINE void StoreU(const Vec128 v, Simd, - uint8_t* HWY_RESTRICT p) { - uint32x2_t a = vreinterpret_u32_u8(v.raw); -- vst1_lane_u32(p, a, 0); -+ vst1_lane_u32(reinterpret_cast(p), a, 0); - } - HWY_INLINE void StoreU(const Vec128 v, Simd, - uint16_t* HWY_RESTRICT p) { - uint32x2_t a = vreinterpret_u32_u16(v.raw); -- vst1_lane_u32(p, a, 0); -+ vst1_lane_u32(reinterpret_cast(p), a, 0); - } - HWY_INLINE void StoreU(const Vec128 v, Simd, - uint32_t* HWY_RESTRICT p) { -@@ -2023,12 +2165,12 @@ HWY_INLINE void StoreU(const Vec128 v, Simd, - int8_t* HWY_RESTRICT p) { - int32x2_t a = vreinterpret_s32_s8(v.raw); -- vst1_lane_s32(p, a, 0); -+ vst1_lane_s32(reinterpret_cast(p), a, 0); - } - HWY_INLINE void StoreU(const Vec128 v, Simd, - int16_t* HWY_RESTRICT p) { - int32x2_t a = vreinterpret_s32_s16(v.raw); -- vst1_lane_s32(p, a, 0); -+ vst1_lane_s32(reinterpret_cast(p), a, 0); - } - HWY_INLINE void StoreU(const Vec128 v, Simd, - int32_t* HWY_RESTRICT p) { -@@ -2044,7 +2186,7 @@ HWY_INLINE void StoreU(const Vec128 v, Simd, - uint8_t* HWY_RESTRICT p) { - uint16x4_t a = vreinterpret_u16_u8(v.raw); -- vst1_lane_u16(p, a, 0); -+ vst1_lane_u16(reinterpret_cast(p), a, 0); - } - HWY_INLINE void StoreU(const Vec128 v, Simd, - uint16_t* HWY_RESTRICT p) { -@@ -2053,7 +2195,7 @@ HWY_INLINE void StoreU(const Vec128 v, Simd, - int8_t* HWY_RESTRICT p) { - int16x4_t a = vreinterpret_s16_s8(v.raw); -- vst1_lane_s16(p, a, 0); -+ vst1_lane_s16(reinterpret_cast(p), a, 0); - } - HWY_INLINE void StoreU(const Vec128 v, Simd, - int16_t* HWY_RESTRICT p) { -@@ -2118,18 +2260,18 @@ HWY_INLINE Vec128 PromoteTo(Fu - const Vec128 v) { - return Vec128(vmovl_u32(v.raw)); - } --HWY_INLINE Vec128 PromoteTo(Full128 /* tag */, -+HWY_INLINE Vec128 PromoteTo(Full128 d, - const Vec128 v) { -- return Vec128(vmovl_u8(v.raw)); -+ return BitCast(d, Vec128(vmovl_u8(v.raw))); - } --HWY_INLINE Vec128 PromoteTo(Full128 /* tag */, -+HWY_INLINE Vec128 PromoteTo(Full128 d, - const Vec128 v) { - uint16x8_t a = vmovl_u8(v.raw); -- return Vec128(vreinterpretq_s32_u16(vmovl_u16(vget_low_u16(a)))); -+ return BitCast(d, Vec128(vmovl_u16(vget_low_u16(a)))); - } --HWY_INLINE Vec128 PromoteTo(Full128 /* tag */, -+HWY_INLINE Vec128 PromoteTo(Full128 d, - const Vec128 v) { -- return Vec128(vmovl_u16(v.raw)); -+ return BitCast(d, Vec128(vmovl_u16(v.raw))); - } - - // Unsigned: zero-extend to half vector. -@@ -2155,9 +2297,9 @@ HWY_INLINE Vec128 PromoteTo - return Vec128(vget_low_u64(vmovl_u32(v.raw))); - } - template --HWY_INLINE Vec128 PromoteTo(Simd /* tag */, -+HWY_INLINE Vec128 PromoteTo(Simd d, - const Vec128 v) { -- return Vec128(vget_low_s16(vmovl_u8(v.raw))); -+ return BitCast(d, Vec128(vget_low_u16(vmovl_u8(v.raw)))); - } - template - HWY_INLINE Vec128 PromoteTo(Simd /* tag */, -@@ -2220,12 +2362,14 @@ HWY_INLINE Vec128 PromoteTo( - - HWY_INLINE Vec128 PromoteTo(Full128 /* tag */, - const Vec128 v) { -- return Vec128(vcvt_f32_f16(vreinterpret_f16_u16(v.raw))); -+ const float32x4_t f32 = vcvt_f32_f16(vreinterpret_f16_u16(v.raw)); -+ return Vec128(f32); - } - template - HWY_INLINE Vec128 PromoteTo(Simd /* tag */, - const Vec128 v) { -- return Vec128(vget_low_f32(vcvt_f32_f16(v.raw))); -+ const float32x4_t f32 = vcvt_f32_f16(vreinterpret_f16_u16(v.raw)); -+ return Vec128(vget_low_f32(f32)); - } - - #else -@@ -2353,7 +2497,8 @@ HWY_INLINE Vec128 DemoteTo - template - HWY_INLINE Vec128 DemoteTo(Simd /* tag */, - const Vec128 v) { -- return Vec128{vcvt_f16_f32(vcombine_f32(v.raw, v.raw))}; -+ const float16x4_t f16 = vcvt_f16_f32(vcombine_f32(v.raw, v.raw)); -+ return Vec128(vreinterpret_u16_f16(f16)); - } - - #else -@@ -2965,33 +3110,58 @@ HWY_INLINE Vec128 TableLookupBytes - BitCast(d8, from).raw))); - } - --// ------------------------------ Hard-coded shuffles -+// ------------------------------ TableLookupLanes - --// Notation: let Vec128 have lanes 3,2,1,0 (0 is least-significant). --// Shuffle0321 rotates one lane to the right (the previous least-significant --// lane is now most-significant). These could also be implemented via --// CombineShiftRightBytes but the shuffle_abcd notation is more convenient. -+// Returned by SetTableIndices for use by TableLookupLanes. -+template -+struct Indices128 { -+ typename detail::Raw128::type raw; -+}; - --// Swap 32-bit halves in 64-bits --HWY_INLINE Vec128 Shuffle2301(const Vec128 v) { -- return Vec128(vrev64_u32(v.raw)); --} --HWY_INLINE Vec128 Shuffle2301(const Vec128 v) { -- return Vec128(vrev64_s32(v.raw)); --} --HWY_INLINE Vec128 Shuffle2301(const Vec128 v) { -- return Vec128(vrev64_f32(v.raw)); -+template -+HWY_INLINE Indices128 SetTableIndices(Simd d, const int32_t* idx) { -+#if !defined(NDEBUG) || defined(ADDRESS_SANITIZER) -+ for (size_t i = 0; i < N; ++i) { -+ HWY_DASSERT(0 <= idx[i] && idx[i] < static_cast(N)); -+ } -+#endif -+ -+ const Repartition d8; -+ alignas(16) uint8_t control[16] = {0}; -+ for (size_t idx_lane = 0; idx_lane < N; ++idx_lane) { -+ for (size_t idx_byte = 0; idx_byte < sizeof(T); ++idx_byte) { -+ control[idx_lane * sizeof(T) + idx_byte] = -+ static_cast(idx[idx_lane] * sizeof(T) + idx_byte); -+ } -+ } -+ return Indices128{BitCast(d, Load(d8, control)).raw}; - } --HWY_INLINE Vec128 Shuffle2301(const Vec128 v) { -- return Vec128(vrev64q_u32(v.raw)); -+ -+template -+HWY_INLINE Vec128 TableLookupLanes( -+ const Vec128 v, const Indices128 idx) { -+ return TableLookupBytes(v, Vec128{idx.raw}); - } --HWY_INLINE Vec128 Shuffle2301(const Vec128 v) { -- return Vec128(vrev64q_s32(v.raw)); -+template -+HWY_INLINE Vec128 TableLookupLanes( -+ const Vec128 v, const Indices128 idx) { -+ return TableLookupBytes(v, Vec128{idx.raw}); - } --HWY_INLINE Vec128 Shuffle2301(const Vec128 v) { -- return Vec128(vrev64q_f32(v.raw)); -+template -+HWY_INLINE Vec128 TableLookupLanes(const Vec128 v, -+ const Indices128 idx) { -+ const Simd di; -+ const auto idx_i = BitCast(di, Vec128{idx.raw}); -+ return BitCast(Simd(), TableLookupBytes(BitCast(di, v), idx_i)); - } - -+// ------------------------------ Other shuffles (TableLookupBytes) -+ -+// Notation: let Vec128 have lanes 3,2,1,0 (0 is least-significant). -+// Shuffle0321 rotates one lane to the right (the previous least-significant -+// lane is now most-significant). These could also be implemented via -+// CombineShiftRightBytes but the shuffle_abcd notation is more convenient. -+ - // Swap 64-bit halves - template - HWY_INLINE Vec128 Shuffle1032(const Vec128 v) { -@@ -3029,49 +3199,6 @@ HWY_INLINE Vec128 Shuffle0123(const V - return TableLookupBytes(v, BitCast(d, Load(d8, bytes))); - } - --// ------------------------------ TableLookupLanes -- --// Returned by SetTableIndices for use by TableLookupLanes. --template --struct Indices128 { -- typename Raw128::type raw; --}; -- --template --HWY_INLINE Indices128 SetTableIndices(const Full128, const int32_t* idx) { --#if !defined(NDEBUG) || defined(ADDRESS_SANITIZER) -- const size_t N = 16 / sizeof(T); -- for (size_t i = 0; i < N; ++i) { -- HWY_DASSERT(0 <= idx[i] && idx[i] < static_cast(N)); -- } --#endif -- -- const Full128 d8; -- alignas(16) uint8_t control[16]; -- for (size_t idx_byte = 0; idx_byte < 16; ++idx_byte) { -- const size_t idx_lane = idx_byte / sizeof(T); -- const size_t mod = idx_byte % sizeof(T); -- control[idx_byte] = idx[idx_lane] * sizeof(T) + mod; -- } -- return Indices128{BitCast(Full128(), Load(d8, control)).raw}; --} -- --HWY_INLINE Vec128 TableLookupLanes(const Vec128 v, -- const Indices128 idx) { -- return TableLookupBytes(v, Vec128(idx.raw)); --} --HWY_INLINE Vec128 TableLookupLanes(const Vec128 v, -- const Indices128 idx) { -- return TableLookupBytes(v, Vec128(idx.raw)); --} --HWY_INLINE Vec128 TableLookupLanes(const Vec128 v, -- const Indices128 idx) { -- const Full128 di; -- const Full128 df; -- return BitCast(df, -- TableLookupBytes(BitCast(di, v), Vec128(idx.raw))); --} -- - // ------------------------------ Interleave lanes - - // Interleaves lanes from halves of the 128-bit blocks of "a" (which provides -@@ -3334,16 +3461,6 @@ HWY_INLINE Vec128 OddEven(const Vec12 - - // ================================================== MISC - --// Returns a vector with lane i=[0, N) set to "first" + i. --template --Vec128 Iota(const Simd d, const T2 first) { -- HWY_ALIGN T lanes[16 / sizeof(T)]; -- for (size_t i = 0; i < 16 / sizeof(T); ++i) { -- lanes[i] = static_cast(first + static_cast(i)); -- } -- return Load(d, lanes); --} -- - // ------------------------------ Scatter (Store) - - template -@@ -3413,52 +3530,44 @@ HWY_API Vec128 GatherIndex(const S - return Load(d, lanes); - } - --// ------------------------------ ARMv7 int64 comparisons (requires Shuffle2301) -+// ------------------------------ Reductions - --#if HWY_ARCH_ARM_V7 -+namespace detail { - --template --HWY_INLINE Mask128 operator==(const Vec128 a, -- const Vec128 b) { -- const Simd d32; -- const Simd d64; -- const auto cmp32 = VecFromMask(d32, BitCast(d32, a) == BitCast(d32, b)); -- const auto cmp64 = cmp32 & Shuffle2301(cmp32); -- return MaskFromVec(BitCast(d64, cmp64)); -+// N=1 for any T: no-op -+template -+HWY_API Vec128 SumOfLanes(const Vec128 v) { -+ return v; - } -- --template --HWY_INLINE Mask128 operator==(const Vec128 a, -- const Vec128 b) { -- const Simd d32; -- const Simd d64; -- const auto cmp32 = VecFromMask(d32, BitCast(d32, a) == BitCast(d32, b)); -- const auto cmp64 = cmp32 & Shuffle2301(cmp32); -- return MaskFromVec(BitCast(d64, cmp64)); -+template -+HWY_API Vec128 MinOfLanes(hwy::SizeTag /* tag */, -+ const Vec128 v) { -+ return v; -+} -+template -+HWY_API Vec128 MaxOfLanes(hwy::SizeTag /* tag */, -+ const Vec128 v) { -+ return v; - } - --HWY_INLINE Mask128 operator<(const Vec128 a, -- const Vec128 b) { -- const int64x2_t sub = vqsubq_s64(a.raw, b.raw); -- return MaskFromVec(BroadcastSignBit(Vec128(sub))); -+// u32/i32/f32: N=2 -+template -+HWY_API Vec128 SumOfLanes(const Vec128 v10) { -+ return v10 + Shuffle2301(v10); - } --HWY_INLINE Mask128 operator<(const Vec128 a, -- const Vec128 b) { -- const int64x1_t sub = vqsub_s64(a.raw, b.raw); -- return MaskFromVec(BroadcastSignBit(Vec128(sub))); -+template -+HWY_API Vec128 MinOfLanes(hwy::SizeTag<4> /* tag */, -+ const Vec128 v10) { -+ return Min(v10, Shuffle2301(v10)); - } -- --template --HWY_INLINE Mask128 operator>(const Vec128 a, -- const Vec128 b) { -- return b < a; -+template -+HWY_API Vec128 MaxOfLanes(hwy::SizeTag<4> /* tag */, -+ const Vec128 v10) { -+ return Max(v10, Shuffle2301(v10)); - } --#endif -- --// ------------------------------ Reductions - -+// full vectors - #if HWY_ARCH_ARM_A64 --// Supported for 32b and 64b vector types. Returns the sum in each lane. - HWY_INLINE Vec128 SumOfLanes(const Vec128 v) { - return Vec128(vdupq_n_u32(vaddvq_u32(v.raw))); - } -@@ -3505,20 +3614,15 @@ HWY_INLINE Vec128 SumOfLanes(co - } - #endif - --namespace detail { -- --// For u32/i32/f32. --template --HWY_API Vec128 MinOfLanes(hwy::SizeTag<4> /* tag */, -- const Vec128 v3210) { -+template -+HWY_API Vec128 MinOfLanes(hwy::SizeTag<4> /* tag */, const Vec128 v3210) { - const Vec128 v1032 = Shuffle1032(v3210); - const Vec128 v31_20_31_20 = Min(v3210, v1032); - const Vec128 v20_31_20_31 = Shuffle0321(v31_20_31_20); - return Min(v20_31_20_31, v31_20_31_20); - } --template --HWY_API Vec128 MaxOfLanes(hwy::SizeTag<4> /* tag */, -- const Vec128 v3210) { -+template -+HWY_API Vec128 MaxOfLanes(hwy::SizeTag<4> /* tag */, const Vec128 v3210) { - const Vec128 v1032 = Shuffle1032(v3210); - const Vec128 v31_20_31_20 = Max(v3210, v1032); - const Vec128 v20_31_20_31 = Shuffle0321(v31_20_31_20); -@@ -3526,15 +3630,13 @@ HWY_API Vec128 MaxOfLanes(hwy::Siz - } - - // For u64/i64[/f64]. --template --HWY_API Vec128 MinOfLanes(hwy::SizeTag<8> /* tag */, -- const Vec128 v10) { -+template -+HWY_API Vec128 MinOfLanes(hwy::SizeTag<8> /* tag */, const Vec128 v10) { - const Vec128 v01 = Shuffle01(v10); - return Min(v10, v01); - } --template --HWY_API Vec128 MaxOfLanes(hwy::SizeTag<8> /* tag */, -- const Vec128 v10) { -+template -+HWY_API Vec128 MaxOfLanes(hwy::SizeTag<8> /* tag */, const Vec128 v10) { - const Vec128 v01 = Shuffle01(v10); - return Max(v10, v01); - } -@@ -3542,6 +3644,10 @@ HWY_API Vec128 MaxOfLanes(hwy::Siz - } // namespace detail - - template -+HWY_API Vec128 SumOfLanes(const Vec128 v) { -+ return detail::SumOfLanes(v); -+} -+template - HWY_API Vec128 MinOfLanes(const Vec128 v) { - return detail::MinOfLanes(hwy::SizeTag(), v); - } -@@ -3569,13 +3675,13 @@ HWY_INLINE uint64_t BitsFromMask(hwy::Si - const uint8x8_t x2 = vget_low_u8(vpaddq_u8(values.raw, values.raw)); - const uint8x8_t x4 = vpadd_u8(x2, x2); - const uint8x8_t x8 = vpadd_u8(x4, x4); -- return vreinterpret_u16_u8(x8)[0]; -+ return vget_lane_u64(vreinterpret_u64_u8(x8), 0); - #else - // Don't have vpaddq, so keep doubling lane size. - const uint16x8_t x2 = vpaddlq_u8(values.raw); - const uint32x4_t x4 = vpaddlq_u16(x2); - const uint64x2_t x8 = vpaddlq_u32(x4); -- return (uint64_t(x8[1]) << 8) | x8[0]; -+ return (vgetq_lane_u64(x8, 1) << 8) | vgetq_lane_u64(x8, 0); - #endif - } - -@@ -3725,7 +3831,7 @@ HWY_INLINE size_t CountTrue(hwy::SizeTag - const int16x8_t x2 = vpaddlq_s8(ones); - const int32x4_t x4 = vpaddlq_s16(x2); - const int64x2_t x8 = vpaddlq_s32(x4); -- return x8[0] + x8[1]; -+ return vgetq_lane_s64(x8, 0) + vgetq_lane_s64(x8, 1); - #endif - } - template -@@ -3739,7 +3845,7 @@ HWY_INLINE size_t CountTrue(hwy::SizeTag - #else - const int32x4_t x2 = vpaddlq_s16(ones); - const int64x2_t x4 = vpaddlq_s32(x2); -- return x4[0] + x4[1]; -+ return vgetq_lane_s64(x4, 0) + vgetq_lane_s64(x4, 1); - #endif - } - -@@ -3753,7 +3859,7 @@ HWY_INLINE size_t CountTrue(hwy::SizeTag - return vaddvq_s32(ones); - #else - const int64x2_t x2 = vpaddlq_s32(ones); -- return x2[0] + x2[1]; -+ return vgetq_lane_s64(x2, 0) + vgetq_lane_s64(x2, 1); - #endif - } - -@@ -3765,10 +3871,10 @@ HWY_INLINE size_t CountTrue(hwy::SizeTag - vnegq_s64(BitCast(di, VecFromMask(Full128(), mask)).raw); - return vaddvq_s64(ones); - #else -- const Full128 di; -- const int64x2_t ones = -- vshrq_n_u64(BitCast(di, VecFromMask(Full128(), mask)).raw, 63); -- return ones[0] + ones[1]; -+ const Full128 du; -+ const auto mask_u = VecFromMask(du, RebindMask(du, mask)); -+ const uint64x2_t ones = vshrq_n_u64(mask_u.raw, 63); -+ return vgetq_lane_u64(ones, 0) + vgetq_lane_u64(ones, 1); - #endif - } - -@@ -3798,11 +3904,13 @@ HWY_INLINE size_t StoreMaskBits(const Ma - template - HWY_INLINE bool AllFalse(const Mask128 m) { - #if HWY_ARCH_ARM_A64 -- return (vmaxvq_u32(m.raw) == 0); -+ const Full128 d32; -+ const auto m32 = MaskFromVec(BitCast(d32, VecFromMask(Full128(), m))); -+ return (vmaxvq_u32(m32.raw) == 0); - #else - const auto v64 = BitCast(Full128(), VecFromMask(Full128(), m)); - uint32x2_t a = vqmovn_u64(v64.raw); -- return vreinterpret_u64_u32(a)[0] == 0; -+ return vget_lane_u64(vreinterpret_u64_u32(a), 0) == 0; - #endif - } - -@@ -4178,6 +4286,7 @@ HWY_API auto Le(V a, V b) -> decltype(a - return a <= b; - } - -+namespace detail { // for code folding - #if HWY_ARCH_ARM_V7 - #undef vuzp1_s8 - #undef vuzp1_u8 -@@ -4265,6 +4374,7 @@ HWY_API auto Le(V a, V b) -> decltype(a - #undef HWY_NEON_DEF_FUNCTION_UINT_8_16_32 - #undef HWY_NEON_DEF_FUNCTION_UINTS - #undef HWY_NEON_EVAL -+} // namespace detail - - // NOLINTNEXTLINE(google-readability-namespace-comments) - } // namespace HWY_NAMESPACE -diff -up chromium-92.0.4515.107/third_party/highway/src/hwy/ops/rvv-inl.h.update-highway-0.12.2 chromium-92.0.4515.107/third_party/highway/src/hwy/ops/rvv-inl.h ---- chromium-92.0.4515.107/third_party/highway/src/hwy/ops/rvv-inl.h.update-highway-0.12.2 2021-07-19 14:47:23.000000000 -0400 -+++ chromium-92.0.4515.107/third_party/highway/src/hwy/ops/rvv-inl.h 2021-07-26 17:10:30.290171587 -0400 -@@ -39,6 +39,11 @@ using TFromV = TFromD>; - hwy::EnableIf>() && !IsFloat>()>* = nullptr - #define HWY_IF_FLOAT_V(V) hwy::EnableIf>()>* = nullptr - -+// kShift = log2 of multiplier: 0 for m1, 1 for m2, -2 for mf4 -+template -+using Full = Simd> (-kShift)) -+ : (HWY_LANES(T) << kShift)>; -+ - // ================================================== MACROS - - // Generate specializations and function definitions using X macros. Although -@@ -58,29 +63,30 @@ namespace detail { // for code folding - - // For given SEW, iterate over all LMUL. Precompute SEW/LMUL => MLEN because the - // preprocessor cannot easily do it. --#define HWY_RVV_FOREACH_08(X_MACRO, BASE, CHAR, NAME, OP) \ -- X_MACRO(BASE, CHAR, 8, 1, 8, NAME, OP) \ -- X_MACRO(BASE, CHAR, 8, 2, 4, NAME, OP) \ -- X_MACRO(BASE, CHAR, 8, 4, 2, NAME, OP) \ -- X_MACRO(BASE, CHAR, 8, 8, 1, NAME, OP) -- --#define HWY_RVV_FOREACH_16(X_MACRO, BASE, CHAR, NAME, OP) \ -- X_MACRO(BASE, CHAR, 16, 1, 16, NAME, OP) \ -- X_MACRO(BASE, CHAR, 16, 2, 8, NAME, OP) \ -- X_MACRO(BASE, CHAR, 16, 4, 4, NAME, OP) \ -- X_MACRO(BASE, CHAR, 16, 8, 2, NAME, OP) -- --#define HWY_RVV_FOREACH_32(X_MACRO, BASE, CHAR, NAME, OP) \ -- X_MACRO(BASE, CHAR, 32, 1, 32, NAME, OP) \ -- X_MACRO(BASE, CHAR, 32, 2, 16, NAME, OP) \ -- X_MACRO(BASE, CHAR, 32, 4, 8, NAME, OP) \ -- X_MACRO(BASE, CHAR, 32, 8, 4, NAME, OP) -- --#define HWY_RVV_FOREACH_64(X_MACRO, BASE, CHAR, NAME, OP) \ -- X_MACRO(BASE, CHAR, 64, 1, 64, NAME, OP) \ -- X_MACRO(BASE, CHAR, 64, 2, 32, NAME, OP) \ -- X_MACRO(BASE, CHAR, 64, 4, 16, NAME, OP) \ -- X_MACRO(BASE, CHAR, 64, 8, 8, NAME, OP) -+// TODO(janwas): GCC does not yet support fractional LMUL -+#define HWY_RVV_FOREACH_08(X_MACRO, BASE, CHAR, NAME, OP) \ -+ X_MACRO(BASE, CHAR, 8, m1, /*kShift=*/0, /*MLEN=*/8, NAME, OP) \ -+ X_MACRO(BASE, CHAR, 8, m2, /*kShift=*/1, /*MLEN=*/4, NAME, OP) \ -+ X_MACRO(BASE, CHAR, 8, m4, /*kShift=*/2, /*MLEN=*/2, NAME, OP) \ -+ X_MACRO(BASE, CHAR, 8, m8, /*kShift=*/3, /*MLEN=*/1, NAME, OP) -+ -+#define HWY_RVV_FOREACH_16(X_MACRO, BASE, CHAR, NAME, OP) \ -+ X_MACRO(BASE, CHAR, 16, m1, /*kShift=*/0, /*MLEN=*/16, NAME, OP) \ -+ X_MACRO(BASE, CHAR, 16, m2, /*kShift=*/1, /*MLEN=*/8, NAME, OP) \ -+ X_MACRO(BASE, CHAR, 16, m4, /*kShift=*/2, /*MLEN=*/4, NAME, OP) \ -+ X_MACRO(BASE, CHAR, 16, m8, /*kShift=*/3, /*MLEN=*/2, NAME, OP) -+ -+#define HWY_RVV_FOREACH_32(X_MACRO, BASE, CHAR, NAME, OP) \ -+ X_MACRO(BASE, CHAR, 32, m1, /*kShift=*/0, /*MLEN=*/32, NAME, OP) \ -+ X_MACRO(BASE, CHAR, 32, m2, /*kShift=*/1, /*MLEN=*/16, NAME, OP) \ -+ X_MACRO(BASE, CHAR, 32, m4, /*kShift=*/2, /*MLEN=*/8, NAME, OP) \ -+ X_MACRO(BASE, CHAR, 32, m8, /*kShift=*/3, /*MLEN=*/4, NAME, OP) -+ -+#define HWY_RVV_FOREACH_64(X_MACRO, BASE, CHAR, NAME, OP) \ -+ X_MACRO(BASE, CHAR, 64, m1, /*kShift=*/0, /*MLEN=*/64, NAME, OP) \ -+ X_MACRO(BASE, CHAR, 64, m2, /*kShift=*/1, /*MLEN=*/32, NAME, OP) \ -+ X_MACRO(BASE, CHAR, 64, m4, /*kShift=*/2, /*MLEN=*/16, NAME, OP) \ -+ X_MACRO(BASE, CHAR, 64, m8, /*kShift=*/3, /*MLEN=*/8, NAME, OP) - - // SEW for unsigned: - #define HWY_RVV_FOREACH_U08(X_MACRO, NAME, OP) \ -@@ -153,63 +159,61 @@ namespace detail { // for code folding - - // Assemble types for use in x-macros - #define HWY_RVV_T(BASE, SEW) BASE##SEW##_t --#define HWY_RVV_D(CHAR, SEW, LMUL) D##CHAR##SEW##m##LMUL --#define HWY_RVV_V(BASE, SEW, LMUL) v##BASE##SEW##m##LMUL##_t -+#define HWY_RVV_D(CHAR, SEW, LMUL) D##CHAR##SEW##LMUL -+#define HWY_RVV_V(BASE, SEW, LMUL) v##BASE##SEW##LMUL##_t - #define HWY_RVV_M(MLEN) vbool##MLEN##_t - - } // namespace detail - - // TODO(janwas): remove typedefs and only use HWY_RVV_V etc. directly - --// TODO(janwas): do we want fractional LMUL? (can encode as negative) --// Mixed-precision code can use LMUL 1..8 and that should be enough unless they --// need many registers. --#define HWY_SPECIALIZE(BASE, CHAR, SEW, LMUL, MLEN, NAME, OP) \ -- using HWY_RVV_D(CHAR, SEW, LMUL) = \ -- Simd; \ -- using V##CHAR##SEW##m##LMUL = HWY_RVV_V(BASE, SEW, LMUL); \ -- template <> \ -- struct DFromV_t { \ -- using Lane = HWY_RVV_T(BASE, SEW); \ -- using type = Simd; \ -+// Until we have full intrinsic support for fractional LMUL, mixed-precision -+// code can use LMUL 1..8 (adequate unless they need many registers). -+#define HWY_SPECIALIZE(BASE, CHAR, SEW, LMUL, SHIFT, MLEN, NAME, OP) \ -+ using HWY_RVV_D(CHAR, SEW, LMUL) = Full; \ -+ using V##CHAR##SEW##LMUL = HWY_RVV_V(BASE, SEW, LMUL); \ -+ template <> \ -+ struct DFromV_t { \ -+ using Lane = HWY_RVV_T(BASE, SEW); \ -+ using type = Full; \ - }; - using Vf16m1 = vfloat16m1_t; - using Vf16m2 = vfloat16m2_t; - using Vf16m4 = vfloat16m4_t; - using Vf16m8 = vfloat16m8_t; --using Df16m1 = Simd; --using Df16m2 = Simd; --using Df16m4 = Simd; --using Df16m8 = Simd; -+using Df16m1 = Full; -+using Df16m2 = Full; -+using Df16m4 = Full; -+using Df16m8 = Full; - - HWY_RVV_FOREACH(HWY_SPECIALIZE, _, _) - #undef HWY_SPECIALIZE - - // vector = f(d), e.g. Zero --#define HWY_RVV_RETV_ARGD(BASE, CHAR, SEW, LMUL, MLEN, NAME, OP) \ -+#define HWY_RVV_RETV_ARGD(BASE, CHAR, SEW, LMUL, SHIFT, MLEN, NAME, OP) \ - HWY_API HWY_RVV_V(BASE, SEW, LMUL) NAME(HWY_RVV_D(CHAR, SEW, LMUL) d) { \ - (void)Lanes(d); \ -- return v##OP##_##CHAR##SEW##m##LMUL(); \ -+ return v##OP##_##CHAR##SEW##LMUL(); \ - } - - // vector = f(vector), e.g. Not --#define HWY_RVV_RETV_ARGV(BASE, CHAR, SEW, LMUL, MLEN, NAME, OP) \ -+#define HWY_RVV_RETV_ARGV(BASE, CHAR, SEW, LMUL, SHIFT, MLEN, NAME, OP) \ - HWY_API HWY_RVV_V(BASE, SEW, LMUL) NAME(HWY_RVV_V(BASE, SEW, LMUL) v) { \ -- return v##OP##_v_##CHAR##SEW##m##LMUL(v); \ -+ return v##OP##_v_##CHAR##SEW##LMUL(v); \ - } - - // vector = f(vector, scalar), e.g. detail::Add --#define HWY_RVV_RETV_ARGVS(BASE, CHAR, SEW, LMUL, MLEN, NAME, OP) \ -- HWY_API HWY_RVV_V(BASE, SEW, LMUL) \ -- NAME(HWY_RVV_V(BASE, SEW, LMUL) a, HWY_RVV_T(BASE, SEW) b) { \ -- return v##OP##_##CHAR##SEW##m##LMUL(a, b); \ -+#define HWY_RVV_RETV_ARGVS(BASE, CHAR, SEW, LMUL, SHIFT, MLEN, NAME, OP) \ -+ HWY_API HWY_RVV_V(BASE, SEW, LMUL) \ -+ NAME(HWY_RVV_V(BASE, SEW, LMUL) a, HWY_RVV_T(BASE, SEW) b) { \ -+ return v##OP##_##CHAR##SEW##LMUL(a, b); \ - } - - // vector = f(vector, vector), e.g. Add --#define HWY_RVV_RETV_ARGVV(BASE, CHAR, SEW, LMUL, MLEN, NAME, OP) \ -+#define HWY_RVV_RETV_ARGVV(BASE, CHAR, SEW, LMUL, SHIFT, MLEN, NAME, OP) \ - HWY_API HWY_RVV_V(BASE, SEW, LMUL) \ - NAME(HWY_RVV_V(BASE, SEW, LMUL) a, HWY_RVV_V(BASE, SEW, LMUL) b) { \ -- return v##OP##_vv_##CHAR##SEW##m##LMUL(a, b); \ -+ return v##OP##_vv_##CHAR##SEW##LMUL(a, b); \ - } - - // ================================================== INIT -@@ -218,9 +222,9 @@ HWY_RVV_FOREACH(HWY_SPECIALIZE, _, _) - - // WARNING: we want to query VLMAX/sizeof(T), but this actually changes VL! - // vlenb is not exposed through intrinsics and vreadvl is not VLMAX. --#define HWY_RVV_LANES(BASE, CHAR, SEW, LMUL, MLEN, NAME, OP) \ -- HWY_API size_t NAME(HWY_RVV_D(CHAR, SEW, LMUL) /* d */) { \ -- return v##OP##SEW##m##LMUL(); \ -+#define HWY_RVV_LANES(BASE, CHAR, SEW, LMUL, SHIFT, MLEN, NAME, OP) \ -+ HWY_API size_t NAME(HWY_RVV_D(CHAR, SEW, LMUL) /* d */) { \ -+ return v##OP##SEW##LMUL(); \ - } - - HWY_RVV_FOREACH(HWY_RVV_LANES, Lanes, setvlmax_e) -@@ -233,19 +237,31 @@ HWY_RVV_FOREACH(HWY_RVV_RETV_ARGD, Zero, - template - using VFromD = decltype(Zero(D())); - -+// Partial -+template -+HWY_API VFromD> Zero(Simd /*tag*/) { -+ return Zero(Full()); -+} -+ - // ------------------------------ Set - // vector = f(d, scalar), e.g. Set --#define HWY_RVV_SET(BASE, CHAR, SEW, LMUL, MLEN, NAME, OP) \ -+#define HWY_RVV_SET(BASE, CHAR, SEW, LMUL, SHIFT, MLEN, NAME, OP) \ - HWY_API HWY_RVV_V(BASE, SEW, LMUL) \ - NAME(HWY_RVV_D(CHAR, SEW, LMUL) d, HWY_RVV_T(BASE, SEW) arg) { \ - (void)Lanes(d); \ -- return v##OP##_##CHAR##SEW##m##LMUL(arg); \ -+ return v##OP##_##CHAR##SEW##LMUL(arg); \ - } - - HWY_RVV_FOREACH_UI(HWY_RVV_SET, Set, mv_v_x) - HWY_RVV_FOREACH_F(HWY_RVV_SET, Set, fmv_v_f) - #undef HWY_RVV_SET - -+// Partial vectors -+template -+HWY_API VFromD> Set(Simd /*tag*/, T arg) { -+ return Set(Full(), arg); -+} -+ - // ------------------------------ Undefined - - // RVV vundefined is 'poisoned' such that even XORing a _variable_ initialized -@@ -265,7 +281,7 @@ HWY_API VFromD Undefined(D d) { - namespace detail { - - // u8: no change --#define HWY_RVV_CAST_NOP(BASE, CHAR, SEW, LMUL, MLEN, NAME, OP) \ -+#define HWY_RVV_CAST_NOP(BASE, CHAR, SEW, LMUL, SHIFT, MLEN, NAME, OP) \ - HWY_API HWY_RVV_V(BASE, SEW, LMUL) \ - BitCastToByte(HWY_RVV_V(BASE, SEW, LMUL) v) { \ - return v; \ -@@ -276,25 +292,25 @@ namespace detail { - } - - // Other integers --#define HWY_RVV_CAST_UI(BASE, CHAR, SEW, LMUL, MLEN, NAME, OP) \ -- HWY_API vuint8m##LMUL##_t BitCastToByte(HWY_RVV_V(BASE, SEW, LMUL) v) { \ -- return v##OP##_v_##CHAR##SEW##m##LMUL##_u8m##LMUL(v); \ -- } \ -- HWY_API HWY_RVV_V(BASE, SEW, LMUL) BitCastFromByte( \ -- HWY_RVV_D(CHAR, SEW, LMUL) /* d */, vuint8m##LMUL##_t v) { \ -- return v##OP##_v_u8m##LMUL##_##CHAR##SEW##m##LMUL(v); \ -+#define HWY_RVV_CAST_UI(BASE, CHAR, SEW, LMUL, SHIFT, MLEN, NAME, OP) \ -+ HWY_API vuint8##LMUL##_t BitCastToByte(HWY_RVV_V(BASE, SEW, LMUL) v) { \ -+ return v##OP##_v_##CHAR##SEW##LMUL##_u8##LMUL(v); \ -+ } \ -+ HWY_API HWY_RVV_V(BASE, SEW, LMUL) BitCastFromByte( \ -+ HWY_RVV_D(CHAR, SEW, LMUL) /* d */, vuint8##LMUL##_t v) { \ -+ return v##OP##_v_u8##LMUL##_##CHAR##SEW##LMUL(v); \ - } - - // Float: first cast to/from unsigned --#define HWY_RVV_CAST_F(BASE, CHAR, SEW, LMUL, MLEN, NAME, OP) \ -- HWY_API vuint8m##LMUL##_t BitCastToByte(HWY_RVV_V(BASE, SEW, LMUL) v) { \ -- return v##OP##_v_u##SEW##m##LMUL##_u8m##LMUL( \ -- v##OP##_v_f##SEW##m##LMUL##_u##SEW##m##LMUL(v)); \ -- } \ -- HWY_API HWY_RVV_V(BASE, SEW, LMUL) BitCastFromByte( \ -- HWY_RVV_D(CHAR, SEW, LMUL) /* d */, vuint8m##LMUL##_t v) { \ -- return v##OP##_v_u##SEW##m##LMUL##_f##SEW##m##LMUL( \ -- v##OP##_v_u8m##LMUL##_u##SEW##m##LMUL(v)); \ -+#define HWY_RVV_CAST_F(BASE, CHAR, SEW, LMUL, SHIFT, MLEN, NAME, OP) \ -+ HWY_API vuint8##LMUL##_t BitCastToByte(HWY_RVV_V(BASE, SEW, LMUL) v) { \ -+ return v##OP##_v_u##SEW##LMUL##_u8##LMUL( \ -+ v##OP##_v_f##SEW##LMUL##_u##SEW##LMUL(v)); \ -+ } \ -+ HWY_API HWY_RVV_V(BASE, SEW, LMUL) BitCastFromByte( \ -+ HWY_RVV_D(CHAR, SEW, LMUL) /* d */, vuint8##LMUL##_t v) { \ -+ return v##OP##_v_u##SEW##LMUL##_f##SEW##LMUL( \ -+ v##OP##_v_u8##LMUL##_u##SEW##LMUL(v)); \ - } - - HWY_RVV_FOREACH_U08(HWY_RVV_CAST_NOP, _, _) -@@ -315,6 +331,12 @@ HWY_API VFromD BitCast(D d, FromV v) - return detail::BitCastFromByte(d, detail::BitCastToByte(v)); - } - -+// Partial -+template -+HWY_API VFromD> BitCast(Simd /*tag*/, FromV v) { -+ return BitCast(Full(), v); -+} -+ - namespace detail { - - template >> -@@ -336,6 +358,12 @@ HWY_API VFromD Iota0(const D /*d*/) - return BitCastToUnsigned(Iota0(DU())); - } - -+// Partial -+template -+HWY_API VFromD> Iota0(Simd /*tag*/) { -+ return Iota0(Full()); -+} -+ - } // namespace detail - - // ================================================== LOGICAL -@@ -370,11 +398,11 @@ HWY_API V And(const V a, const V b) { - // ------------------------------ Or - - // Scalar argument plus mask. Used by VecFromMask. --#define HWY_RVV_OR_MASK(BASE, CHAR, SEW, LMUL, MLEN, NAME, OP) \ -+#define HWY_RVV_OR_MASK(BASE, CHAR, SEW, LMUL, SHIFT, MLEN, NAME, OP) \ - HWY_API HWY_RVV_V(BASE, SEW, LMUL) \ - NAME(HWY_RVV_V(BASE, SEW, LMUL) v, HWY_RVV_T(BASE, SEW) imm, \ - HWY_RVV_M(MLEN) mask, HWY_RVV_V(BASE, SEW, LMUL) maskedoff) { \ -- return v##OP##_##CHAR##SEW##m##LMUL##_m(mask, maskedoff, v, imm); \ -+ return v##OP##_##CHAR##SEW##LMUL##_m(mask, maskedoff, v, imm); \ - } - - namespace detail { -@@ -466,14 +494,14 @@ HWY_RVV_FOREACH_U16(HWY_RVV_RETV_ARGVV, - // ------------------------------ ShiftLeft[Same] - - // Intrinsics do not define .vi forms, so use .vx instead. --#define HWY_RVV_SHIFT(BASE, CHAR, SEW, LMUL, MLEN, NAME, OP) \ -- template \ -- HWY_API HWY_RVV_V(BASE, SEW, LMUL) NAME(HWY_RVV_V(BASE, SEW, LMUL) v) { \ -- return v##OP##_vx_##CHAR##SEW##m##LMUL(v, kBits); \ -- } \ -- HWY_API HWY_RVV_V(BASE, SEW, LMUL) \ -- NAME##Same(HWY_RVV_V(BASE, SEW, LMUL) v, int bits) { \ -- return v##OP##_vx_##CHAR##SEW##m##LMUL(v, static_cast(bits)); \ -+#define HWY_RVV_SHIFT(BASE, CHAR, SEW, LMUL, SHIFT, MLEN, NAME, OP) \ -+ template \ -+ HWY_API HWY_RVV_V(BASE, SEW, LMUL) NAME(HWY_RVV_V(BASE, SEW, LMUL) v) { \ -+ return v##OP##_vx_##CHAR##SEW##LMUL(v, kBits); \ -+ } \ -+ HWY_API HWY_RVV_V(BASE, SEW, LMUL) \ -+ NAME##Same(HWY_RVV_V(BASE, SEW, LMUL) v, int bits) { \ -+ return v##OP##_vx_##CHAR##SEW##LMUL(v, static_cast(bits)); \ - } - - HWY_RVV_FOREACH_UI(HWY_RVV_SHIFT, ShiftLeft, sll) -@@ -486,19 +514,18 @@ HWY_RVV_FOREACH_I(HWY_RVV_SHIFT, ShiftRi - #undef HWY_RVV_SHIFT - - // ------------------------------ Shl --#define HWY_RVV_SHIFT_VV(BASE, CHAR, SEW, LMUL, MLEN, NAME, OP) \ -+#define HWY_RVV_SHIFT_VV(BASE, CHAR, SEW, LMUL, SHIFT, MLEN, NAME, OP) \ - HWY_API HWY_RVV_V(BASE, SEW, LMUL) \ - NAME(HWY_RVV_V(BASE, SEW, LMUL) v, HWY_RVV_V(BASE, SEW, LMUL) bits) { \ -- return v##OP##_vv_##CHAR##SEW##m##LMUL(v, bits); \ -+ return v##OP##_vv_##CHAR##SEW##LMUL(v, bits); \ - } - - HWY_RVV_FOREACH_U(HWY_RVV_SHIFT_VV, Shl, sll) - --#define HWY_RVV_SHIFT_II(BASE, CHAR, SEW, LMUL, MLEN, NAME, OP) \ -+#define HWY_RVV_SHIFT_II(BASE, CHAR, SEW, LMUL, SHIFT, MLEN, NAME, OP) \ - HWY_API HWY_RVV_V(BASE, SEW, LMUL) \ - NAME(HWY_RVV_V(BASE, SEW, LMUL) v, HWY_RVV_V(BASE, SEW, LMUL) bits) { \ -- return v##OP##_vv_##CHAR##SEW##m##LMUL(v, \ -- detail::BitCastToUnsigned(bits)); \ -+ return v##OP##_vv_##CHAR##SEW##LMUL(v, detail::BitCastToUnsigned(bits)); \ - } - - HWY_RVV_FOREACH_I(HWY_RVV_SHIFT_II, Shl, sll) -@@ -569,11 +596,11 @@ HWY_API V ApproximateReciprocalSqrt(cons - - // ------------------------------ MulAdd - // Note: op is still named vv, not vvv. --#define HWY_RVV_FMA(BASE, CHAR, SEW, LMUL, MLEN, NAME, OP) \ -+#define HWY_RVV_FMA(BASE, CHAR, SEW, LMUL, SHIFT, MLEN, NAME, OP) \ - HWY_API HWY_RVV_V(BASE, SEW, LMUL) \ - NAME(HWY_RVV_V(BASE, SEW, LMUL) mul, HWY_RVV_V(BASE, SEW, LMUL) x, \ - HWY_RVV_V(BASE, SEW, LMUL) add) { \ -- return v##OP##_vv_##CHAR##SEW##m##LMUL(add, mul, x); \ -+ return v##OP##_vv_##CHAR##SEW##LMUL(add, mul, x); \ - } - - HWY_RVV_FOREACH_F(HWY_RVV_FMA, MulAdd, fmacc) -@@ -596,11 +623,11 @@ HWY_RVV_FOREACH_F(HWY_RVV_FMA, NegMulSub - // of all bits; SLEN 8 / LMUL 4 = half of all bits. - - // mask = f(vector, vector) --#define HWY_RVV_RETM_ARGVV(BASE, CHAR, SEW, LMUL, MLEN, NAME, OP) \ -+#define HWY_RVV_RETM_ARGVV(BASE, CHAR, SEW, LMUL, SHIFT, MLEN, NAME, OP) \ - HWY_API HWY_RVV_M(MLEN) \ - NAME(HWY_RVV_V(BASE, SEW, LMUL) a, HWY_RVV_V(BASE, SEW, LMUL) b) { \ - (void)Lanes(DFromV()); \ -- return v##OP##_vv_##CHAR##SEW##m##LMUL##_b##MLEN(a, b); \ -+ return v##OP##_vv_##CHAR##SEW##LMUL##_b##MLEN(a, b); \ - } - - // ------------------------------ Eq -@@ -675,11 +702,11 @@ HWY_RVV_FOREACH_B(HWY_RVV_RETM_ARGMM, Xo - #undef HWY_RVV_RETM_ARGMM - - // ------------------------------ IfThenElse --#define HWY_RVV_IF_THEN_ELSE(BASE, CHAR, SEW, LMUL, MLEN, NAME, OP) \ -- HWY_API HWY_RVV_V(BASE, SEW, LMUL) \ -- NAME(HWY_RVV_M(MLEN) m, HWY_RVV_V(BASE, SEW, LMUL) yes, \ -- HWY_RVV_V(BASE, SEW, LMUL) no) { \ -- return v##OP##_vvm_##CHAR##SEW##m##LMUL(m, no, yes); \ -+#define HWY_RVV_IF_THEN_ELSE(BASE, CHAR, SEW, LMUL, SHIFT, MLEN, NAME, OP) \ -+ HWY_API HWY_RVV_V(BASE, SEW, LMUL) \ -+ NAME(HWY_RVV_M(MLEN) m, HWY_RVV_V(BASE, SEW, LMUL) yes, \ -+ HWY_RVV_V(BASE, SEW, LMUL) no) { \ -+ return v##OP##_vvm_##CHAR##SEW##LMUL(m, no, yes); \ - } - - HWY_RVV_FOREACH(HWY_RVV_IF_THEN_ELSE, IfThenElse, merge) -@@ -774,17 +801,17 @@ HWY_RVV_FOREACH_B(HWY_RVV_COUNT_TRUE, _, - - // ------------------------------ Load - --#define HWY_RVV_LOAD(BASE, CHAR, SEW, LMUL, MLEN, NAME, OP) \ -- HWY_API HWY_RVV_V(BASE, SEW, LMUL) \ -- NAME(HWY_RVV_D(CHAR, SEW, LMUL) d, \ -- const HWY_RVV_T(BASE, SEW) * HWY_RESTRICT p) { \ -- (void)Lanes(d); \ -- return v##OP##SEW##_v_##CHAR##SEW##m##LMUL(p); \ -+#define HWY_RVV_LOAD(BASE, CHAR, SEW, LMUL, SHIFT, MLEN, NAME, OP) \ -+ HWY_API HWY_RVV_V(BASE, SEW, LMUL) \ -+ NAME(HWY_RVV_D(CHAR, SEW, LMUL) d, \ -+ const HWY_RVV_T(BASE, SEW) * HWY_RESTRICT p) { \ -+ (void)Lanes(d); \ -+ return v##OP##SEW##_v_##CHAR##SEW##LMUL(p); \ - } - HWY_RVV_FOREACH(HWY_RVV_LOAD, Load, le) - #undef HWY_RVV_LOAD - --// Partial load -+// Partial - template - HWY_API VFromD> Load(Simd d, const T* HWY_RESTRICT p) { - return Load(d, p); -@@ -800,16 +827,22 @@ HWY_API VFromD LoadU(D d, const TFrom - - // ------------------------------ Store - --#define HWY_RVV_RET_ARGVDP(BASE, CHAR, SEW, LMUL, MLEN, NAME, OP) \ -- HWY_API void NAME(HWY_RVV_V(BASE, SEW, LMUL) v, \ -- HWY_RVV_D(CHAR, SEW, LMUL) d, \ -- HWY_RVV_T(BASE, SEW) * HWY_RESTRICT p) { \ -- (void)Lanes(d); \ -- return v##OP##SEW##_v_##CHAR##SEW##m##LMUL(p, v); \ -+#define HWY_RVV_RET_ARGVDP(BASE, CHAR, SEW, LMUL, SHIFT, MLEN, NAME, OP) \ -+ HWY_API void NAME(HWY_RVV_V(BASE, SEW, LMUL) v, \ -+ HWY_RVV_D(CHAR, SEW, LMUL) d, \ -+ HWY_RVV_T(BASE, SEW) * HWY_RESTRICT p) { \ -+ (void)Lanes(d); \ -+ return v##OP##SEW##_v_##CHAR##SEW##LMUL(p, v); \ - } - HWY_RVV_FOREACH(HWY_RVV_RET_ARGVDP, Store, se) - #undef HWY_RVV_RET_ARGVDP - -+// Partial -+template -+HWY_API void Store(VFromD> v, Simd d, T* HWY_RESTRICT p) { -+ return Store(v, Full(), p); -+} -+ - // ------------------------------ StoreU - - // RVV only requires lane alignment, not natural alignment of the entire vector. -@@ -963,67 +996,6 @@ HWY_API VFromD> Promote - return BitCast(d, PromoteTo(Simd(), v)); - } - --// ------------------------------ PromoteTo I -- --HWY_API Vi16m2 PromoteTo(Di16m2 /* d */, Vi8m1 v) { return vsext_vf2_i16m2(v); } --HWY_API Vi16m4 PromoteTo(Di16m4 /* d */, Vi8m2 v) { return vsext_vf2_i16m4(v); } --HWY_API Vi16m8 PromoteTo(Di16m8 /* d */, Vi8m4 v) { return vsext_vf2_i16m8(v); } -- --HWY_API Vi32m4 PromoteTo(Di32m4 /* d */, Vi8m1 v) { return vsext_vf4_i32m4(v); } --HWY_API Vi32m8 PromoteTo(Di32m8 /* d */, Vi8m2 v) { return vsext_vf4_i32m8(v); } -- --HWY_API Vi32m2 PromoteTo(Di32m2 /* d */, const Vi16m1 v) { -- return vsext_vf2_i32m2(v); --} --HWY_API Vi32m4 PromoteTo(Di32m4 /* d */, const Vi16m2 v) { -- return vsext_vf2_i32m4(v); --} --HWY_API Vi32m8 PromoteTo(Di32m8 /* d */, const Vi16m4 v) { -- return vsext_vf2_i32m8(v); --} -- --HWY_API Vi64m2 PromoteTo(Di64m2 /* d */, const Vi32m1 v) { -- return vsext_vf2_i64m2(v); --} --HWY_API Vi64m4 PromoteTo(Di64m4 /* d */, const Vi32m2 v) { -- return vsext_vf2_i64m4(v); --} --HWY_API Vi64m8 PromoteTo(Di64m8 /* d */, const Vi32m4 v) { -- return vsext_vf2_i64m8(v); --} -- --// ------------------------------ PromoteTo F -- --HWY_API Vf32m2 PromoteTo(Df32m2 /* d */, const Vf16m1 v) { -- return vfwcvt_f_f_v_f32m2(v); --} --HWY_API Vf32m4 PromoteTo(Df32m4 /* d */, const Vf16m2 v) { -- return vfwcvt_f_f_v_f32m4(v); --} --HWY_API Vf32m8 PromoteTo(Df32m8 /* d */, const Vf16m4 v) { -- return vfwcvt_f_f_v_f32m8(v); --} -- --HWY_API Vf64m2 PromoteTo(Df64m2 /* d */, const Vf32m1 v) { -- return vfwcvt_f_f_v_f64m2(v); --} --HWY_API Vf64m4 PromoteTo(Df64m4 /* d */, const Vf32m2 v) { -- return vfwcvt_f_f_v_f64m4(v); --} --HWY_API Vf64m8 PromoteTo(Df64m8 /* d */, const Vf32m4 v) { -- return vfwcvt_f_f_v_f64m8(v); --} -- --HWY_API Vf64m2 PromoteTo(Df64m2 /* d */, const Vi32m1 v) { -- return vfwcvt_f_x_v_f64m2(v); --} --HWY_API Vf64m4 PromoteTo(Df64m4 /* d */, const Vi32m2 v) { -- return vfwcvt_f_x_v_f64m4(v); --} --HWY_API Vf64m8 PromoteTo(Df64m8 /* d */, const Vi32m4 v) { -- return vfwcvt_f_x_v_f64m8(v); --} -- - // ------------------------------ DemoteTo U - - // First clamp negative numbers to zero to match x86 packus. -@@ -1124,19 +1096,19 @@ HWY_API Vi32m4 DemoteTo(Di32m4 /* d */, - - // ------------------------------ ConvertTo F - --#define HWY_RVV_CONVERT(BASE, CHAR, SEW, LMUL, MLEN, NAME, OP) \ -+#define HWY_RVV_CONVERT(BASE, CHAR, SEW, LMUL, SHIFT, MLEN, NAME, OP) \ - HWY_API HWY_RVV_V(BASE, SEW, LMUL) ConvertTo( \ - HWY_RVV_D(CHAR, SEW, LMUL) /* d */, HWY_RVV_V(int, SEW, LMUL) v) { \ -- return vfcvt_f_x_v_f##SEW##m##LMUL(v); \ -+ return vfcvt_f_x_v_f##SEW##LMUL(v); \ - } \ - /* Truncates (rounds toward zero). */ \ - HWY_API HWY_RVV_V(int, SEW, LMUL) ConvertTo(HWY_RVV_D(i, SEW, LMUL) /* d */, \ - HWY_RVV_V(BASE, SEW, LMUL) v) { \ -- return vfcvt_rtz_x_f_v_i##SEW##m##LMUL(v); \ -+ return vfcvt_rtz_x_f_v_i##SEW##LMUL(v); \ - } \ - /* Uses default rounding mode. */ \ - HWY_API HWY_RVV_V(int, SEW, LMUL) NearestInt(HWY_RVV_V(BASE, SEW, LMUL) v) { \ -- return vfcvt_x_f_v_i##SEW##m##LMUL(v); \ -+ return vfcvt_x_f_v_i##SEW##LMUL(v); \ - } - - // API only requires f32 but we provide f64 for internal use (otherwise, it -@@ -1184,10 +1156,10 @@ HWY_API VFromD SetTableIndices(D d, - - // <32bit are not part of Highway API, but used in Broadcast. This limits VLMAX - // to 2048! We could instead use vrgatherei16. --#define HWY_RVV_TABLE(BASE, CHAR, SEW, LMUL, MLEN, NAME, OP) \ -+#define HWY_RVV_TABLE(BASE, CHAR, SEW, LMUL, SHIFT, MLEN, NAME, OP) \ - HWY_API HWY_RVV_V(BASE, SEW, LMUL) \ - NAME(HWY_RVV_V(BASE, SEW, LMUL) v, HWY_RVV_V(uint, SEW, LMUL) idx) { \ -- return v##OP##_vv_##CHAR##SEW##m##LMUL(v, idx); \ -+ return v##OP##_vv_##CHAR##SEW##LMUL(v, idx); \ - } - - HWY_RVV_FOREACH(HWY_RVV_TABLE, TableLookupLanes, rgather) -@@ -1279,7 +1251,6 @@ HWY_API V OffsetsOf128BitBlocks(const D - using T = MakeUnsigned>; - return detail::And(iota0, static_cast(~(LanesPerBlock(d) - 1))); - } -- - } // namespace detail - - template -@@ -1307,9 +1278,9 @@ HWY_API V Broadcast(const V v) { - - // ------------------------------ GetLane - --#define HWY_RVV_GET_LANE(BASE, CHAR, SEW, LMUL, MLEN, NAME, OP) \ -- HWY_API HWY_RVV_T(BASE, SEW) NAME(HWY_RVV_V(BASE, SEW, LMUL) v) { \ -- return v##OP##_s_##CHAR##SEW##m##LMUL##_##CHAR##SEW(v); \ -+#define HWY_RVV_GET_LANE(BASE, CHAR, SEW, LMUL, SHIFT, MLEN, NAME, OP) \ -+ HWY_API HWY_RVV_T(BASE, SEW) NAME(HWY_RVV_V(BASE, SEW, LMUL) v) { \ -+ return v##OP##_s_##CHAR##SEW##LMUL##_##CHAR##SEW(v); \ - } - - HWY_RVV_FOREACH_UI(HWY_RVV_GET_LANE, GetLane, mv_x) -@@ -1318,11 +1289,12 @@ HWY_RVV_FOREACH_F(HWY_RVV_GET_LANE, GetL - - // ------------------------------ ShiftLeftLanes - --// vector = f(vector, size_t) --#define HWY_RVV_SLIDE(BASE, CHAR, SEW, LMUL, MLEN, NAME, OP) \ -- HWY_API HWY_RVV_V(BASE, SEW, LMUL) \ -- NAME(HWY_RVV_V(BASE, SEW, LMUL) v, size_t lanes) { \ -- return v##OP##_vx_##CHAR##SEW##m##LMUL(v, v, lanes); \ -+// vector = f(vector, vector, size_t) -+#define HWY_RVV_SLIDE(BASE, CHAR, SEW, LMUL, SHIFT, MLEN, NAME, OP) \ -+ HWY_API HWY_RVV_V(BASE, SEW, LMUL) \ -+ NAME(HWY_RVV_V(BASE, SEW, LMUL) dst, HWY_RVV_V(BASE, SEW, LMUL) src, \ -+ size_t lanes) { \ -+ return v##OP##_vx_##CHAR##SEW##LMUL(dst, src, lanes); \ - } - - namespace detail { -@@ -1333,7 +1305,7 @@ template - HWY_API V ShiftLeftLanes(const V v) { - using D = DFromV; - const RebindToSigned di; -- const auto shifted = detail::SlideUp(v, kLanes); -+ const auto shifted = detail::SlideUp(v, v, kLanes); - // Match x86 semantics by zeroing lower lanes in 128-bit blocks - constexpr size_t kLanesPerBlock = detail::LanesPerBlock(di); - const auto idx_mod = detail::And(detail::Iota0(di), kLanesPerBlock - 1); -@@ -1363,7 +1335,7 @@ template - HWY_API V ShiftRightLanes(const V v) { - using D = DFromV; - const RebindToSigned di; -- const auto shifted = detail::SlideDown(v, kLanes); -+ const auto shifted = detail::SlideDown(v, v, kLanes); - // Match x86 semantics by zeroing upper lanes in 128-bit blocks - constexpr size_t kLanesPerBlock = detail::LanesPerBlock(di); - const auto idx_mod = detail::And(detail::Iota0(di), kLanesPerBlock - 1); -@@ -1405,7 +1377,7 @@ HWY_API V ConcatUpperLower(const V hi, c - template - HWY_API V ConcatLowerLower(const V hi, const V lo) { - // Move lower half into upper -- const auto hi_up = detail::SlideUp(hi, Lanes(DFromV()) / 2); -+ const auto hi_up = detail::SlideUp(hi, hi, Lanes(DFromV()) / 2); - return ConcatUpperLower(hi_up, lo); - } - -@@ -1414,7 +1386,7 @@ HWY_API V ConcatLowerLower(const V hi, c - template - HWY_API V ConcatUpperUpper(const V hi, const V lo) { - // Move upper half into lower -- const auto lo_down = detail::SlideDown(lo, Lanes(DFromV()) / 2); -+ const auto lo_down = detail::SlideDown(lo, lo, Lanes(DFromV()) / 2); - return ConcatUpperLower(hi, lo_down); - } - -@@ -1423,8 +1395,8 @@ HWY_API V ConcatUpperUpper(const V hi, c - template - HWY_API V ConcatLowerUpper(const V hi, const V lo) { - // Move half of both inputs to the other half -- const auto hi_up = detail::SlideUp(hi, Lanes(DFromV()) / 2); -- const auto lo_down = detail::SlideDown(lo, Lanes(DFromV()) / 2); -+ const auto hi_up = detail::SlideUp(hi, hi, Lanes(DFromV()) / 2); -+ const auto lo_down = detail::SlideDown(lo, lo, Lanes(DFromV()) / 2); - return ConcatUpperLower(hi_up, lo_down); - } - -@@ -1491,61 +1463,55 @@ HWY_API V Combine(const V a, const V b) - // ================================================== REDUCE - - // vector = f(vector, zero_m1) --#define HWY_RVV_REDUCE(BASE, CHAR, SEW, LMUL, MLEN, NAME, OP) \ -- HWY_API HWY_RVV_V(BASE, SEW, LMUL) \ -- NAME(HWY_RVV_V(BASE, SEW, LMUL) v, HWY_RVV_V(BASE, SEW, 1) v0) { \ -- vsetvlmax_e##SEW##m##LMUL(); \ -- return Set(HWY_RVV_D(CHAR, SEW, LMUL)(), \ -- GetLane(v##OP##_vs_##CHAR##SEW##m##LMUL##_##CHAR##SEW##m1( \ -- v0, v, v0))); \ -+#define HWY_RVV_REDUCE(BASE, CHAR, SEW, LMUL, SHIFT, MLEN, NAME, OP) \ -+ HWY_API HWY_RVV_V(BASE, SEW, LMUL) \ -+ NAME(HWY_RVV_V(BASE, SEW, LMUL) v, HWY_RVV_V(BASE, SEW, m1) v0) { \ -+ vsetvlmax_e##SEW##LMUL(); \ -+ return Set( \ -+ HWY_RVV_D(CHAR, SEW, LMUL)(), \ -+ GetLane(v##OP##_vs_##CHAR##SEW##LMUL##_##CHAR##SEW##m1(v0, v, v0))); \ - } - - // ------------------------------ SumOfLanes - - namespace detail { -- - HWY_RVV_FOREACH_UI(HWY_RVV_REDUCE, RedSum, redsum) - HWY_RVV_FOREACH_F(HWY_RVV_REDUCE, RedSum, fredsum) -- - } // namespace detail - - template - HWY_API V SumOfLanes(const V v) { - using T = TFromV; -- const auto v0 = Zero(Simd()); // always m1 -+ const auto v0 = Zero(Full()); // always m1 - return detail::RedSum(v, v0); - } - - // ------------------------------ MinOfLanes - namespace detail { -- - HWY_RVV_FOREACH_U(HWY_RVV_REDUCE, RedMin, redminu) - HWY_RVV_FOREACH_I(HWY_RVV_REDUCE, RedMin, redmin) - HWY_RVV_FOREACH_F(HWY_RVV_REDUCE, RedMin, fredmin) -- - } // namespace detail - - template - HWY_API V MinOfLanes(const V v) { - using T = TFromV; -- const Simd d1; // always m1 -+ const Full d1; // always m1 - const auto neutral = Set(d1, HighestValue()); - return detail::RedMin(v, neutral); - } - - // ------------------------------ MaxOfLanes - namespace detail { -- - HWY_RVV_FOREACH_U(HWY_RVV_REDUCE, RedMax, redmaxu) - HWY_RVV_FOREACH_I(HWY_RVV_REDUCE, RedMax, redmax) - HWY_RVV_FOREACH_F(HWY_RVV_REDUCE, RedMax, fredmax) -- - } // namespace detail - - template - HWY_API V MaxOfLanes(const V v) { - using T = TFromV; -- const Simd d1; // always m1 -+ const Full d1; // always m1 - const auto neutral = Set(d1, LowestValue()); - return detail::RedMax(v, neutral); - } -@@ -1570,7 +1536,7 @@ HWY_API VFromD LoadDup128(D d, const - #define HWY_RVV_STORE_MASK_BITS(MLEN, NAME, OP) \ - HWY_API size_t StoreMaskBits(HWY_RVV_M(MLEN) m, uint8_t* p) { \ - /* LMUL=1 is always enough */ \ -- Simd d8; \ -+ Full d8; \ - const size_t num_bytes = (Lanes(d8) + MLEN - 1) / MLEN; \ - /* TODO(janwas): how to convert vbool* to vuint?*/ \ - /*Store(m, d8, p);*/ \ -@@ -1581,6 +1547,22 @@ HWY_API VFromD LoadDup128(D d, const - HWY_RVV_FOREACH_B(HWY_RVV_STORE_MASK_BITS, _, _) - #undef HWY_RVV_STORE_MASK_BITS - -+// ------------------------------ FirstN (Iota0, Lt, RebindMask, SlideUp) -+ -+// Disallow for 8-bit because Iota is likely to overflow. -+template -+HWY_API MFromD FirstN(const D d, const size_t n) { -+ const RebindToSigned di; -+ return RebindMask(d, Lt(BitCast(di, detail::Iota0(d)), Set(di, n))); -+} -+ -+template -+HWY_API MFromD FirstN(const D d, const size_t n) { -+ const auto zero = Zero(d); -+ const auto one = Set(d, 1); -+ return Eq(detail::SlideUp(one, zero, n), one); -+} -+ - // ------------------------------ Neg - - template -@@ -1589,9 +1571,9 @@ HWY_API V Neg(const V v) { - } - - // vector = f(vector), but argument is repeated --#define HWY_RVV_RETV_ARGV2(BASE, CHAR, SEW, LMUL, MLEN, NAME, OP) \ -+#define HWY_RVV_RETV_ARGV2(BASE, CHAR, SEW, LMUL, SHIFT, MLEN, NAME, OP) \ - HWY_API HWY_RVV_V(BASE, SEW, LMUL) NAME(HWY_RVV_V(BASE, SEW, LMUL) v) { \ -- return v##OP##_vv_##CHAR##SEW##m##LMUL(v, v); \ -+ return v##OP##_vv_##CHAR##SEW##LMUL(v, v); \ - } - - HWY_RVV_FOREACH_F(HWY_RVV_RETV_ARGV2, Neg, fsgnjn) -@@ -1628,7 +1610,6 @@ template - HWY_API auto UseInt(const V v) -> decltype(MaskFromVec(v)) { - return Lt(Abs(v), Set(DFromV(), MantissaEnd>())); - } -- - } // namespace detail - - template -@@ -1699,10 +1680,8 @@ HWY_API VFromD Iota(const D d, TFromD - // Using vwmul does not work for m8, so use mulh instead. Highway only provides - // MulHigh for 16-bit, so use a private wrapper. - namespace detail { -- - HWY_RVV_FOREACH_U32(HWY_RVV_RETV_ARGVV, MulHigh, mulhu) - HWY_RVV_FOREACH_I32(HWY_RVV_RETV_ARGVV, MulHigh, mulh) -- - } // namespace detail - - template -@@ -1712,7 +1691,7 @@ HWY_API VFromD> dw; -- return BitCast(dw, OddEven(detail::SlideUp(hi, 1), lo)); -+ return BitCast(dw, OddEven(detail::SlideUp(hi, hi, 1), lo)); - } - - // ================================================== END MACROS -diff -up chromium-92.0.4515.107/third_party/highway/src/hwy/ops/x86_128-inl.h.update-highway-0.12.2 chromium-92.0.4515.107/third_party/highway/src/hwy/ops/x86_128-inl.h ---- chromium-92.0.4515.107/third_party/highway/src/hwy/ops/x86_128-inl.h.update-highway-0.12.2 2021-07-19 14:47:23.000000000 -0400 -+++ chromium-92.0.4515.107/third_party/highway/src/hwy/ops/x86_128-inl.h 2021-07-26 17:19:52.153729522 -0400 -@@ -154,27 +154,28 @@ HWY_API Vec128 Zero(Simd - HWY_API Vec128 Set(Simd /* tag */, const uint8_t t) { -- return Vec128{_mm_set1_epi8(t)}; -+ return Vec128{_mm_set1_epi8(static_cast(t))}; // NOLINT - } - template - HWY_API Vec128 Set(Simd /* tag */, const uint16_t t) { -- return Vec128{_mm_set1_epi16(t)}; -+ return Vec128{_mm_set1_epi16(static_cast(t))}; // NOLINT - } - template - HWY_API Vec128 Set(Simd /* tag */, const uint32_t t) { -- return Vec128{_mm_set1_epi32(t)}; -+ return Vec128{_mm_set1_epi32(static_cast(t))}; - } - template - HWY_API Vec128 Set(Simd /* tag */, const uint64_t t) { -- return Vec128{_mm_set1_epi64x(t)}; -+ return Vec128{ -+ _mm_set1_epi64x(static_cast(t))}; // NOLINT - } - template - HWY_API Vec128 Set(Simd /* tag */, const int8_t t) { -- return Vec128{_mm_set1_epi8(t)}; -+ return Vec128{_mm_set1_epi8(static_cast(t))}; // NOLINT - } - template - HWY_API Vec128 Set(Simd /* tag */, const int16_t t) { -- return Vec128{_mm_set1_epi16(t)}; -+ return Vec128{_mm_set1_epi16(static_cast(t))}; // NOLINT - } - template - HWY_API Vec128 Set(Simd /* tag */, const int32_t t) { -@@ -182,7 +183,8 @@ HWY_API Vec128 Set(Simd - HWY_API Vec128 Set(Simd /* tag */, const int64_t t) { -- return Vec128{_mm_set1_epi64x(t)}; -+ return Vec128{ -+ _mm_set1_epi64x(static_cast(t))}; // NOLINT - } - template - HWY_API Vec128 Set(Simd /* tag */, const float t) { -@@ -684,6 +686,14 @@ HWY_API Mask128 operator>=(co - return Mask128{_mm_cmpge_pd(a.raw, b.raw)}; - } - -+// ------------------------------ FirstN (Iota, Lt) -+ -+template -+HWY_API Mask128 FirstN(const Simd d, size_t num) { -+ const RebindToSigned di; // Signed comparisons are cheaper. -+ return RebindMask(d, Iota(di, 0) < Set(di, static_cast>(num))); -+} -+ - // ================================================== ARITHMETIC - - // ------------------------------ Addition -@@ -895,7 +905,7 @@ template - HWY_API Vec128 Abs(const Vec128 v) { - return Vec128{_mm_abs_epi32(v.raw)}; - } -- -+// i64 is implemented after BroadcastSignBit. - template - HWY_API Vec128 Abs(const Vec128 v) { - const Vec128 mask{_mm_set1_epi32(0x7FFFFFFF)}; -@@ -1067,15 +1077,24 @@ HWY_API Vec128 BroadcastSign - return VecFromMask(v < Zero(Simd())); - #else - // Efficient Gt() requires SSE4.2 but we only have SSE4.1. BLENDVPD requires -- // two constants and domain crossing. 32-bit compare only requires Zero() -- // plus a shuffle to replicate the upper 32 bits. -+ // two constants and domain crossing. 32-bit shift avoids generating a zero. - const Simd d32; -- const auto sign = BitCast(d32, v) < Zero(d32); -+ const auto sign = ShiftRight<31>(BitCast(d32, v)); - return Vec128{ - _mm_shuffle_epi32(sign.raw, _MM_SHUFFLE(3, 3, 1, 1))}; - #endif - } - -+template -+HWY_API Vec128 Abs(const Vec128 v) { -+#if HWY_TARGET == HWY_AVX3 -+ return Vec128{_mm_abs_epi64(v.raw)}; -+#else -+ const auto zero = Zero(Simd()); -+ return IfThenElse(MaskFromVec(BroadcastSignBit(v)), zero - v, v); -+#endif -+} -+ - template - HWY_API Vec128 ShiftRight(const Vec128 v) { - #if HWY_TARGET == HWY_AVX3 -@@ -1787,6 +1806,10 @@ HWY_API void Stream(const Vec128 GatherIndex(Si - - #endif // HWY_TARGET != HWY_SSE4 - -+HWY_DIAGNOSTICS(pop) -+ - // ================================================== SWIZZLE - - // ------------------------------ Extract half -@@ -2075,10 +2100,10 @@ HWY_INLINE Vec128 UpperHalf(V - // ------------------------------ Shift vector by constant #bytes - - // 0x01..0F, kBytes = 1 => 0x02..0F00 --template --HWY_API Vec128 ShiftLeftBytes(const Vec128 v) { -+template -+HWY_API Vec128 ShiftLeftBytes(const Vec128 v) { - static_assert(0 <= kBytes && kBytes <= 16, "Invalid kBytes"); -- return Vec128{_mm_slli_si128(v.raw, kBytes)}; -+ return Vec128{_mm_slli_si128(v.raw, kBytes)}; - } - - template -@@ -2089,10 +2114,10 @@ HWY_API Vec128 ShiftLeftLanes(cons - } - - // 0x01..0F, kBytes = 1 => 0x0001..0E --template --HWY_API Vec128 ShiftRightBytes(const Vec128 v) { -+template -+HWY_API Vec128 ShiftRightBytes(const Vec128 v) { - static_assert(0 <= kBytes && kBytes <= 16, "Invalid kBytes"); -- return Vec128{_mm_srli_si128(v.raw, kBytes)}; -+ return Vec128{_mm_srli_si128(v.raw, kBytes)}; - } - - template -@@ -2257,44 +2282,47 @@ HWY_API Vec128 Shuffle0123(const - // ------------------------------ TableLookupLanes - - // Returned by SetTableIndices for use by TableLookupLanes. --template -+template - struct Indices128 { - __m128i raw; - }; - --template --HWY_API Indices128 SetTableIndices(Full128, const int32_t* idx) { -+template -+HWY_API Indices128 SetTableIndices(Simd d, const int32_t* idx) { - #if !defined(NDEBUG) || defined(ADDRESS_SANITIZER) -- const size_t N = 16 / sizeof(T); - for (size_t i = 0; i < N; ++i) { - HWY_DASSERT(0 <= idx[i] && idx[i] < static_cast(N)); - } - #endif - -- const Full128 d8; -- alignas(16) uint8_t control[16]; -- for (size_t idx_byte = 0; idx_byte < 16; ++idx_byte) { -- const size_t idx_lane = idx_byte / sizeof(T); -- const size_t mod = idx_byte % sizeof(T); -- control[idx_byte] = static_cast(idx[idx_lane] * sizeof(T) + mod); -+ const Repartition d8; -+ alignas(16) uint8_t control[16] = {0}; -+ for (size_t idx_lane = 0; idx_lane < N; ++idx_lane) { -+ for (size_t idx_byte = 0; idx_byte < sizeof(T); ++idx_byte) { -+ control[idx_lane * sizeof(T) + idx_byte] = -+ static_cast(idx[idx_lane] * sizeof(T) + idx_byte); -+ } - } -- return Indices128{Load(d8, control).raw}; -+ return Indices128{Load(d8, control).raw}; - } - --HWY_API Vec128 TableLookupLanes(const Vec128 v, -- const Indices128 idx) { -- return TableLookupBytes(v, Vec128{idx.raw}); -+template -+HWY_API Vec128 TableLookupLanes( -+ const Vec128 v, const Indices128 idx) { -+ return TableLookupBytes(v, Vec128{idx.raw}); - } --HWY_API Vec128 TableLookupLanes(const Vec128 v, -- const Indices128 idx) { -- return TableLookupBytes(v, Vec128{idx.raw}); -+template -+HWY_API Vec128 TableLookupLanes(const Vec128 v, -+ const Indices128 idx) { -+ return TableLookupBytes(v, Vec128{idx.raw}); - } --HWY_API Vec128 TableLookupLanes(const Vec128 v, -- const Indices128 idx) { -- const Full128 di; -- const Full128 df; -+template -+HWY_API Vec128 TableLookupLanes(const Vec128 v, -+ const Indices128 idx) { -+ const Simd di; -+ const Simd df; - return BitCast(df, -- TableLookupBytes(BitCast(di, v), Vec128{idx.raw})); -+ TableLookupBytes(BitCast(di, v), Vec128{idx.raw})); - } - - // ------------------------------ Interleave lanes -@@ -2502,47 +2530,47 @@ HWY_INLINE Vec128 ConcatUpperLow - - namespace detail { - --template --HWY_API Vec128 OddEven(hwy::SizeTag<1> /* tag */, const Vec128 a, -- const Vec128 b) { -- const Full128 d; -- const Full128 d8; -+template -+HWY_API Vec128 OddEven(hwy::SizeTag<1> /* tag */, const Vec128 a, -+ const Vec128 b) { -+ const Simd d; -+ const Repartition d8; - alignas(16) constexpr uint8_t mask[16] = {0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, - 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0}; - return IfThenElse(MaskFromVec(BitCast(d, Load(d8, mask))), b, a); - } --template --HWY_API Vec128 OddEven(hwy::SizeTag<2> /* tag */, const Vec128 a, -- const Vec128 b) { -- return Vec128{_mm_blend_epi16(a.raw, b.raw, 0x55)}; -+template -+HWY_API Vec128 OddEven(hwy::SizeTag<2> /* tag */, const Vec128 a, -+ const Vec128 b) { -+ return Vec128{_mm_blend_epi16(a.raw, b.raw, 0x55)}; - } --template --HWY_API Vec128 OddEven(hwy::SizeTag<4> /* tag */, const Vec128 a, -- const Vec128 b) { -- return Vec128{_mm_blend_epi16(a.raw, b.raw, 0x33)}; -+template -+HWY_API Vec128 OddEven(hwy::SizeTag<4> /* tag */, const Vec128 a, -+ const Vec128 b) { -+ return Vec128{_mm_blend_epi16(a.raw, b.raw, 0x33)}; - } --template --HWY_API Vec128 OddEven(hwy::SizeTag<8> /* tag */, const Vec128 a, -- const Vec128 b) { -- return Vec128{_mm_blend_epi16(a.raw, b.raw, 0x0F)}; -+template -+HWY_API Vec128 OddEven(hwy::SizeTag<8> /* tag */, const Vec128 a, -+ const Vec128 b) { -+ return Vec128{_mm_blend_epi16(a.raw, b.raw, 0x0F)}; - } - - } // namespace detail - --template --HWY_API Vec128 OddEven(const Vec128 a, const Vec128 b) { -+template -+HWY_API Vec128 OddEven(const Vec128 a, const Vec128 b) { - return detail::OddEven(hwy::SizeTag(), a, b); - } --template <> --HWY_INLINE Vec128 OddEven(const Vec128 a, -- const Vec128 b) { -- return Vec128{_mm_blend_ps(a.raw, b.raw, 5)}; -+template -+HWY_INLINE Vec128 OddEven(const Vec128 a, -+ const Vec128 b) { -+ return Vec128{_mm_blend_ps(a.raw, b.raw, 5)}; - } - --template <> --HWY_INLINE Vec128 OddEven(const Vec128 a, -- const Vec128 b) { -- return Vec128{_mm_blend_pd(a.raw, b.raw, 1)}; -+template -+HWY_INLINE Vec128 OddEven(const Vec128 a, -+ const Vec128 b) { -+ return Vec128{_mm_blend_pd(a.raw, b.raw, 1)}; - } - - // ------------------------------ Shl (ZipLower, Mul) -@@ -2980,7 +3008,7 @@ HWY_API Vec128 U8FromU32(con - return LowerHalf(LowerHalf(BitCast(d8, quad))); - } - --// ------------------------------ Convert integer <=> floating point -+// ------------------------------ Integer <=> fp (ShiftRight, OddEven) - - template - HWY_API Vec128 ConvertTo(Simd /* tag */, -@@ -2995,13 +3023,20 @@ HWY_API Vec128 ConvertTo(Simd - (void)dd; - return Vec128{_mm_cvtepi64_pd(v.raw)}; - #else -- alignas(16) int64_t lanes_i[2]; -- Store(v, Simd(), lanes_i); -- alignas(16) double lanes_d[2]; -- for (size_t i = 0; i < N; ++i) { -- lanes_d[i] = static_cast(lanes_i[i]); -- } -- return Load(dd, lanes_d); -+ // Based on wim's approach (https://stackoverflow.com/questions/41144668/) -+ const Repartition d32; -+ const Repartition d64; -+ -+ // Toggle MSB of lower 32-bits and insert exponent for 2^84 + 2^63 -+ const auto k84_63 = Set(d64, 0x4530000080000000ULL); -+ const auto v_upper = BitCast(dd, ShiftRight<32>(BitCast(d64, v)) ^ k84_63); -+ -+ // Exponent is 2^52, lower 32 bits from v (=> 32-bit OddEven) -+ const auto k52 = Set(d32, 0x43300000); -+ const auto v_lower = BitCast(dd, OddEven(k52, BitCast(d32, v))); -+ -+ const auto k84_63_52 = BitCast(dd, Set(d64, 0x4530000080100000ULL)); -+ return (v_upper - k84_63_52) + v_lower; // order matters! - #endif - } - -@@ -3572,55 +3607,87 @@ HWY_API void StoreInterleaved4(const Vec - - namespace detail { - --// For u32/i32/f32. --template --HWY_API Vec128 SumOfLanes(hwy::SizeTag<4> /* tag */, -- const Vec128 v3210) { -+// N=1 for any T: no-op -+template -+HWY_API Vec128 SumOfLanes(hwy::SizeTag /* tag */, -+ const Vec128 v) { -+ return v; -+} -+template -+HWY_API Vec128 MinOfLanes(hwy::SizeTag /* tag */, -+ const Vec128 v) { -+ return v; -+} -+template -+HWY_API Vec128 MaxOfLanes(hwy::SizeTag /* tag */, -+ const Vec128 v) { -+ return v; -+} -+ -+// u32/i32/f32: -+ -+// N=2 -+template -+HWY_API Vec128 SumOfLanes(hwy::SizeTag<4> /* tag */, -+ const Vec128 v10) { -+ return v10 + Vec128{Shuffle2301(Vec128{v10.raw}).raw}; -+} -+template -+HWY_API Vec128 MinOfLanes(hwy::SizeTag<4> /* tag */, -+ const Vec128 v10) { -+ return Min(v10, Vec128{Shuffle2301(Vec128{v10.raw}).raw}); -+} -+template -+HWY_API Vec128 MaxOfLanes(hwy::SizeTag<4> /* tag */, -+ const Vec128 v10) { -+ return Max(v10, Vec128{Shuffle2301(Vec128{v10.raw}).raw}); -+} -+ -+// N=4 (full) -+template -+HWY_API Vec128 SumOfLanes(hwy::SizeTag<4> /* tag */, const Vec128 v3210) { - const Vec128 v1032 = Shuffle1032(v3210); - const Vec128 v31_20_31_20 = v3210 + v1032; - const Vec128 v20_31_20_31 = Shuffle0321(v31_20_31_20); - return v20_31_20_31 + v31_20_31_20; - } --template --HWY_API Vec128 MinOfLanes(hwy::SizeTag<4> /* tag */, -- const Vec128 v3210) { -+template -+HWY_API Vec128 MinOfLanes(hwy::SizeTag<4> /* tag */, const Vec128 v3210) { - const Vec128 v1032 = Shuffle1032(v3210); - const Vec128 v31_20_31_20 = Min(v3210, v1032); - const Vec128 v20_31_20_31 = Shuffle0321(v31_20_31_20); - return Min(v20_31_20_31, v31_20_31_20); - } --template --HWY_API Vec128 MaxOfLanes(hwy::SizeTag<4> /* tag */, -- const Vec128 v3210) { -+template -+HWY_API Vec128 MaxOfLanes(hwy::SizeTag<4> /* tag */, const Vec128 v3210) { - const Vec128 v1032 = Shuffle1032(v3210); - const Vec128 v31_20_31_20 = Max(v3210, v1032); - const Vec128 v20_31_20_31 = Shuffle0321(v31_20_31_20); - return Max(v20_31_20_31, v31_20_31_20); - } - --// For u64/i64/f64. --template --HWY_API Vec128 SumOfLanes(hwy::SizeTag<8> /* tag */, -- const Vec128 v10) { -+// u64/i64/f64: -+ -+// N=2 (full) -+template -+HWY_API Vec128 SumOfLanes(hwy::SizeTag<8> /* tag */, const Vec128 v10) { - const Vec128 v01 = Shuffle01(v10); - return v10 + v01; - } --template --HWY_API Vec128 MinOfLanes(hwy::SizeTag<8> /* tag */, -- const Vec128 v10) { -+template -+HWY_API Vec128 MinOfLanes(hwy::SizeTag<8> /* tag */, const Vec128 v10) { - const Vec128 v01 = Shuffle01(v10); - return Min(v10, v01); - } --template --HWY_API Vec128 MaxOfLanes(hwy::SizeTag<8> /* tag */, -- const Vec128 v10) { -+template -+HWY_API Vec128 MaxOfLanes(hwy::SizeTag<8> /* tag */, const Vec128 v10) { - const Vec128 v01 = Shuffle01(v10); - return Max(v10, v01); - } - - } // namespace detail - --// Supported for u/i/f 32/64. Returns the sum in each lane. -+// Supported for u/i/f 32/64. Returns the same value in each lane. - template - HWY_API Vec128 SumOfLanes(const Vec128 v) { - return detail::SumOfLanes(hwy::SizeTag(), v); -diff -up chromium-92.0.4515.107/third_party/highway/src/hwy/ops/x86_256-inl.h.update-highway-0.12.2 chromium-92.0.4515.107/third_party/highway/src/hwy/ops/x86_256-inl.h ---- chromium-92.0.4515.107/third_party/highway/src/hwy/ops/x86_256-inl.h.update-highway-0.12.2 2021-07-19 14:47:23.000000000 -0400 -+++ chromium-92.0.4515.107/third_party/highway/src/hwy/ops/x86_256-inl.h 2021-07-26 17:19:30.740403369 -0400 -@@ -20,15 +20,18 @@ - // particular, "Broadcast", pack and zip behavior may be surprising. - - #include // AVX2+ -+ - #if defined(_MSC_VER) && defined(__clang__) - // Including should be enough, but Clang's headers helpfully skip - // including these headers when _MSC_VER is defined, like when using clang-cl. - // Include these directly here. --#include - #include -+// avxintrin defines __m256i and must come before avx2intrin. - #include -+#include // _pext_u64 - #include - #include -+#include - #endif - - #include -@@ -159,23 +162,24 @@ HWY_API Vec256 Set(Full256{_mm256_set1_epi16(static_cast(t))}; // NOLINT - } - HWY_API Vec256 Set(Full256 /* tag */, const uint32_t t) { -- return Vec256{_mm256_set1_epi32(static_cast(t))}; // NOLINT -+ return Vec256{_mm256_set1_epi32(static_cast(t))}; - } - HWY_API Vec256 Set(Full256 /* tag */, const uint64_t t) { - return Vec256{ - _mm256_set1_epi64x(static_cast(t))}; // NOLINT - } - HWY_API Vec256 Set(Full256 /* tag */, const int8_t t) { -- return Vec256{_mm256_set1_epi8(t)}; -+ return Vec256{_mm256_set1_epi8(static_cast(t))}; // NOLINT - } - HWY_API Vec256 Set(Full256 /* tag */, const int16_t t) { -- return Vec256{_mm256_set1_epi16(t)}; -+ return Vec256{_mm256_set1_epi16(static_cast(t))}; // NOLINT - } - HWY_API Vec256 Set(Full256 /* tag */, const int32_t t) { - return Vec256{_mm256_set1_epi32(t)}; - } - HWY_API Vec256 Set(Full256 /* tag */, const int64_t t) { -- return Vec256{_mm256_set1_epi64x(t)}; -+ return Vec256{ -+ _mm256_set1_epi64x(static_cast(t))}; // NOLINT - } - HWY_API Vec256 Set(Full256 /* tag */, const float t) { - return Vec256{_mm256_set1_ps(t)}; -@@ -351,6 +355,8 @@ HWY_API Vec256 VecFromMask(Full256 - return Vec256{v.raw}; - } - -+// ------------------------------ IfThenElse -+ - // mask ? yes : no - template - HWY_API Vec256 IfThenElse(const Mask256 mask, const Vec256 yes, -@@ -681,6 +687,14 @@ HWY_API Vec256 Max(const Vec256< - return Vec256{_mm256_max_pd(a.raw, b.raw)}; - } - -+// ------------------------------ FirstN (Iota, Lt) -+ -+template -+HWY_API Mask256 FirstN(const Full256 d, size_t n) { -+ const RebindToSigned di; // Signed comparisons are cheaper. -+ return RebindMask(d, Iota(di, 0) < Set(di, static_cast>(n))); -+} -+ - // ================================================== ARITHMETIC - - // ------------------------------ Addition -@@ -843,7 +857,13 @@ HWY_API Vec256 AverageRound(co - - // Returns absolute value, except that LimitsMin() maps to LimitsMax() + 1. - HWY_API Vec256 Abs(const Vec256 v) { -+#if HWY_COMPILER_MSVC -+ // Workaround for incorrect codegen? (wrong result) -+ const auto zero = Zero(Full256()); -+ return Vec256{_mm256_max_epi8(v.raw, (zero - v).raw)}; -+#else - return Vec256{_mm256_abs_epi8(v.raw)}; -+#endif - } - HWY_API Vec256 Abs(const Vec256 v) { - return Vec256{_mm256_abs_epi16(v.raw)}; -@@ -851,6 +871,7 @@ HWY_API Vec256 Abs(const Vec256 - HWY_API Vec256 Abs(const Vec256 v) { - return Vec256{_mm256_abs_epi32(v.raw)}; - } -+// i64 is implemented after BroadcastSignBit. - - HWY_API Vec256 Abs(const Vec256 v) { - const Vec256 mask{_mm256_set1_epi32(0x7FFFFFFF)}; -@@ -1027,6 +1048,15 @@ HWY_API Vec256 ShiftRight(const - #endif - } - -+HWY_API Vec256 Abs(const Vec256 v) { -+#if HWY_TARGET == HWY_AVX3 -+ return Vec256{_mm256_abs_epi64(v.raw)}; -+#else -+ const auto zero = Zero(Full256()); -+ return IfThenElse(MaskFromVec(BroadcastSignBit(v)), zero - v, v); -+#endif -+} -+ - // ------------------------------ ShiftLeftSame - - HWY_API Vec256 ShiftLeftSame(const Vec256 v, -@@ -1398,6 +1428,10 @@ HWY_API void Stream(const Vec256 - - // ------------------------------ Scatter - -+// Work around warnings in the intrinsic definitions (passing -1 as a mask). -+HWY_DIAGNOSTICS(push) -+HWY_DIAGNOSTICS_OFF(disable : 4245 4365, ignored "-Wsign-conversion") -+ - #if HWY_TARGET == HWY_AVX3 - namespace detail { - -@@ -1584,6 +1618,8 @@ HWY_INLINE Vec256 GatherIndex{_mm256_i64gather_pd(base, index.raw, 8)}; - } - -+HWY_DIAGNOSTICS(pop) -+ - // ================================================== SWIZZLE - - template -@@ -2379,11 +2415,18 @@ HWY_API Vec128 DemoteTo(Full128< - _mm256_castsi256_si128(_mm256_permute4x64_epi64(i8, 0x88))}; - } - -+ // Avoid "value of intrinsic immediate argument '8' is out of range '0 - 7'". -+ // 8 is the correct value of _MM_FROUND_NO_EXC, which is allowed here. -+HWY_DIAGNOSTICS(push) -+HWY_DIAGNOSTICS_OFF(disable : 4556, ignored "-Wsign-conversion") -+ - HWY_API Vec128 DemoteTo(Full128 /* tag */, - const Vec256 v) { - return Vec128{_mm256_cvtps_ph(v.raw, _MM_FROUND_NO_EXC)}; - } - -+HWY_DIAGNOSTICS(pop) -+ - HWY_API Vec128 DemoteTo(Full128 /* tag */, - const Vec256 v) { - return Vec128{_mm256_cvtpd_ps(v.raw)}; -@@ -2409,7 +2452,7 @@ HWY_API Vec128 U8FromU32(con - return BitCast(Simd(), pair); - } - --// ------------------------------ Convert integer <=> floating point -+// ------------------------------ Integer <=> fp (ShiftRight, OddEven) - - HWY_API Vec256 ConvertTo(Full256 /* tag */, - const Vec256 v) { -@@ -2421,13 +2464,20 @@ HWY_API Vec256 ConvertTo(Full256 - (void)dd; - return Vec256{_mm256_cvtepi64_pd(v.raw)}; - #else -- alignas(32) int64_t lanes_i[4]; -- Store(v, Full256(), lanes_i); -- alignas(32) double lanes_d[4]; -- for (size_t i = 0; i < 4; ++i) { -- lanes_d[i] = static_cast(lanes_i[i]); -- } -- return Load(dd, lanes_d); -+ // Based on wim's approach (https://stackoverflow.com/questions/41144668/) -+ const Repartition d32; -+ const Repartition d64; -+ -+ // Toggle MSB of lower 32-bits and insert exponent for 2^84 + 2^63 -+ const auto k84_63 = Set(d64, 0x4530000080000000ULL); -+ const auto v_upper = BitCast(dd, ShiftRight<32>(BitCast(d64, v)) ^ k84_63); -+ -+ // Exponent is 2^52, lower 32 bits from v (=> 32-bit OddEven) -+ const auto k52 = Set(d32, 0x43300000); -+ const auto v_lower = BitCast(dd, OddEven(k52, BitCast(d32, v))); -+ -+ const auto k84_63_52 = BitCast(dd, Set(d64, 0x4530000080100000ULL)); -+ return (v_upper - k84_63_52) + v_lower; // order matters! - #endif - } - -@@ -2502,8 +2552,7 @@ HWY_API uint64_t BitsFromMask(hwy::SizeT - const auto compressed = - _mm256_permute4x64_epi64(sign_bits, _MM_SHUFFLE(3, 1, 2, 0)); - return static_cast(_mm256_movemask_epi8(compressed)); -- --#endif -+#endif // HWY_ARCH_X86_64 - } - - template -diff -up chromium-92.0.4515.107/third_party/highway/src/hwy/targets.cc.update-highway-0.12.2 chromium-92.0.4515.107/third_party/highway/src/hwy/targets.cc ---- chromium-92.0.4515.107/third_party/highway/src/hwy/targets.cc.update-highway-0.12.2 2021-07-19 14:47:23.000000000 -0400 -+++ chromium-92.0.4515.107/third_party/highway/src/hwy/targets.cc 2021-07-26 17:17:24.610482240 -0400 -@@ -32,8 +32,8 @@ - #include - #else // HWY_COMPILER_MSVC - #include --#endif // HWY_COMPILER_MSVC --#endif -+#endif // HWY_COMPILER_MSVC -+#endif // HWY_ARCH_X86 - - namespace hwy { - namespace { -@@ -126,7 +126,7 @@ constexpr uint32_t kAVX512VL = 1u << 13; - constexpr uint32_t kAVX512DQ = 1u << 14; - constexpr uint32_t kAVX512BW = 1u << 15; - constexpr uint32_t kGroupAVX3 = kAVX512F | kAVX512VL | kAVX512DQ | kAVX512BW; --#endif -+#endif // HWY_ARCH_X86 - - } // namespace - -diff -up chromium-92.0.4515.107/third_party/highway/src/hwy/targets.h.update-highway-0.12.2 chromium-92.0.4515.107/third_party/highway/src/hwy/targets.h ---- chromium-92.0.4515.107/third_party/highway/src/hwy/targets.h.update-highway-0.12.2 2021-07-19 14:47:23.000000000 -0400 -+++ chromium-92.0.4515.107/third_party/highway/src/hwy/targets.h 2021-07-26 17:17:24.610482240 -0400 -@@ -65,7 +65,9 @@ - // HWY_MAX_DYNAMIC_TARGETS in total. - #define HWY_HIGHEST_TARGET_BIT_X86 9 - --// 0x400, 0x800, 0x1000 reserved for SVE, SVE2, Helium -+#define HWY_SVE2 0x400 -+#define HWY_SVE 0x800 -+// 0x1000 reserved for Helium - #define HWY_NEON 0x2000 - - #define HWY_HIGHEST_TARGET_BIT_ARM 13 -@@ -90,6 +92,9 @@ - // 0x2000000, 0x4000000, 0x8000000, 0x10000000 reserved - - #define HWY_SCALAR 0x20000000 -+ -+#define HWY_HIGHEST_TARGET_BIT_SCALAR 29 -+ - // Cannot use higher values, otherwise HWY_TARGETS computation might overflow. - - //------------------------------------------------------------------------------ -@@ -106,25 +111,26 @@ - #ifndef HWY_BROKEN_TARGETS - - // x86 clang-6: we saw multiple AVX2/3 compile errors and in one case invalid --// SSE4 codegen (msan failure), so disable all those targets. -+// SSE4 codegen (possibly only for msan), so disable all those targets. - #if HWY_ARCH_X86 && (HWY_COMPILER_CLANG != 0 && HWY_COMPILER_CLANG < 700) --// TODO: Disable all non-scalar targets for every build target once we have --// clang-7 enabled in our builders. --#ifdef MEMORY_SANITIZER - #define HWY_BROKEN_TARGETS (HWY_SSE4 | HWY_AVX2 | HWY_AVX3) --#else --#define HWY_BROKEN_TARGETS 0 --#endif - // This entails a major speed reduction, so warn unless the user explicitly - // opts in to scalar-only. - #if !defined(HWY_COMPILE_ONLY_SCALAR) - #pragma message("x86 Clang <= 6: define HWY_COMPILE_ONLY_SCALAR or upgrade.") - #endif - --// MSVC, or 32-bit may fail to compile AVX2/3. --#elif HWY_COMPILER_MSVC != 0 || HWY_ARCH_X86_32 -+// 32-bit may fail to compile AVX2/3. -+#elif HWY_ARCH_X86_32 - #define HWY_BROKEN_TARGETS (HWY_AVX2 | HWY_AVX3) --#pragma message("Disabling AVX2/3 due to known issues with MSVC/32-bit builds") -+ -+// MSVC AVX3 support is buggy: https://github.com/Mysticial/Flops/issues/16 -+#elif HWY_COMPILER_MSVC != 0 -+#define HWY_BROKEN_TARGETS (HWY_AVX3) -+ -+// armv7be has not been tested and is not yet supported. -+#elif HWY_ARCH_ARM_V7 && (defined(__ARM_BIG_ENDIAN) || defined(__BIG_ENDIAN)) -+#define HWY_BROKEN_TARGETS (HWY_NEON) - - #else - #define HWY_BROKEN_TARGETS 0 -@@ -145,53 +151,74 @@ - // user to override this without any guarantee of success. - #ifndef HWY_BASELINE_TARGETS - --#ifdef __wasm_simd128__ -+// Also check HWY_ARCH to ensure that simulating unknown platforms ends up with -+// HWY_TARGET == HWY_SCALAR. -+ -+#if HWY_ARCH_WASM && defined(__wasm_simd128__) - #define HWY_BASELINE_WASM HWY_WASM - #else - #define HWY_BASELINE_WASM 0 - #endif - --#ifdef __VSX__ -+// Avoid choosing the PPC target until we have an implementation. -+#if HWY_ARCH_PPC && defined(__VSX__) && 0 - #define HWY_BASELINE_PPC8 HWY_PPC8 - #else - #define HWY_BASELINE_PPC8 0 - #endif - --// GCC 4.5.4 only defines the former; 5.4 defines both. --#if defined(__ARM_NEON__) || defined(__ARM_NEON) -+// Avoid choosing the SVE[2] targets the implementation is ready. -+#if HWY_ARCH_ARM && defined(__ARM_FEATURE_SVE2) && 0 -+#define HWY_BASELINE_SVE2 HWY_SVE2 -+#else -+#define HWY_BASELINE_SVE2 0 -+#endif -+ -+#if HWY_ARCH_ARM && defined(__ARM_FEATURE_SVE) && 0 -+#define HWY_BASELINE_SVE HWY_SVE -+#else -+#define HWY_BASELINE_SVE 0 -+#endif -+ -+// GCC 4.5.4 only defines __ARM_NEON__; 5.4 defines both. -+#if HWY_ARCH_ARM && (defined(__ARM_NEON__) || defined(__ARM_NEON)) - #define HWY_BASELINE_NEON HWY_NEON - #else - #define HWY_BASELINE_NEON 0 - #endif - --#ifdef __SSE4_1__ -+// MSVC does not set SSE4_1, but it does set AVX; checking for the latter means -+// we at least get SSE4 on machines supporting AVX but not AVX2. -+// https://stackoverflow.com/questions/18563978/ -+#if HWY_ARCH_X86 && \ -+ (defined(__SSE4_1__) || (HWY_COMPILER_MSVC != 0 && defined(__AVX__))) - #define HWY_BASELINE_SSE4 HWY_SSE4 - #else - #define HWY_BASELINE_SSE4 0 - #endif - --#ifdef __AVX2__ -+#if HWY_ARCH_X86 && defined(__AVX2__) - #define HWY_BASELINE_AVX2 HWY_AVX2 - #else - #define HWY_BASELINE_AVX2 0 - #endif - --#ifdef __AVX512F__ -+#if HWY_ARCH_X86 && defined(__AVX512F__) - #define HWY_BASELINE_AVX3 HWY_AVX3 - #else - #define HWY_BASELINE_AVX3 0 - #endif - --#ifdef __riscv_vector -+#if HWY_ARCH_RVV && defined(__riscv_vector) - #define HWY_BASELINE_RVV HWY_RVV - #else - #define HWY_BASELINE_RVV 0 - #endif - - #define HWY_BASELINE_TARGETS \ -- (HWY_SCALAR | HWY_BASELINE_WASM | HWY_BASELINE_PPC8 | HWY_BASELINE_NEON | \ -- HWY_BASELINE_SSE4 | HWY_BASELINE_AVX2 | HWY_BASELINE_AVX3 | \ -- HWY_BASELINE_RVV) -+ (HWY_SCALAR | HWY_BASELINE_WASM | HWY_BASELINE_PPC8 | HWY_BASELINE_SVE2 | \ -+ HWY_BASELINE_SVE | HWY_BASELINE_NEON | HWY_BASELINE_SSE4 | \ -+ HWY_BASELINE_AVX2 | HWY_BASELINE_AVX3 | HWY_BASELINE_RVV) - - #endif // HWY_BASELINE_TARGETS - -@@ -242,13 +269,12 @@ - #define HWY_TARGETS HWY_STATIC_TARGET - - // 3) For tests: include all attainable targets (in particular: scalar) --#elif defined(HWY_COMPILE_ALL_ATTAINABLE) -+#elif defined(HWY_COMPILE_ALL_ATTAINABLE) || defined(HWY_IS_TEST) - #define HWY_TARGETS HWY_ATTAINABLE_TARGETS - - // 4) Default: attainable WITHOUT non-best baseline. This reduces code size by - // excluding superseded targets, in particular scalar. - #else -- - #define HWY_TARGETS (HWY_ATTAINABLE_TARGETS & (2 * HWY_STATIC_TARGET - 1)) - - #endif // target policy -@@ -323,6 +349,10 @@ static inline HWY_MAYBE_UNUSED const cha - #endif - - #if HWY_ARCH_ARM -+ case HWY_SVE2: -+ return "SVE2"; -+ case HWY_SVE: -+ return "SVE"; - case HWY_NEON: - return "Neon"; - #endif -@@ -346,7 +376,7 @@ static inline HWY_MAYBE_UNUSED const cha - return "Scalar"; - - default: -- return "?"; -+ return "Unknown"; // must satisfy gtest IsValidParamName() - } - } - -@@ -405,21 +435,17 @@ static inline HWY_MAYBE_UNUSED const cha - nullptr, /* SSE3 */ \ - nullptr /* SSE2 */ - --#endif // HWY_ARCH_X86 -- --#if HWY_ARCH_ARM -+#elif HWY_ARCH_ARM - // See HWY_ARCH_X86 above for details. - #define HWY_MAX_DYNAMIC_TARGETS 4 - #define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_ARM - #define HWY_CHOOSE_TARGET_LIST(func_name) \ -- nullptr, /* reserved */ \ -- nullptr, /* reserved */ \ -+ HWY_CHOOSE_SVE2(func_name), /* SVE2 */ \ -+ HWY_CHOOSE_SVE(func_name), /* SVE */ \ - nullptr, /* reserved */ \ - HWY_CHOOSE_NEON(func_name) /* NEON */ - --#endif // HWY_ARCH_ARM -- --#if HWY_ARCH_PPC -+#elif HWY_ARCH_PPC - // See HWY_ARCH_X86 above for details. - #define HWY_MAX_DYNAMIC_TARGETS 5 - #define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_PPC -@@ -430,9 +456,7 @@ static inline HWY_MAYBE_UNUSED const cha - nullptr, /* VSX */ \ - nullptr /* AltiVec */ - --#endif // HWY_ARCH_PPC -- --#if HWY_ARCH_WASM -+#elif HWY_ARCH_WASM - // See HWY_ARCH_X86 above for details. - #define HWY_MAX_DYNAMIC_TARGETS 4 - #define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_WASM -@@ -442,9 +466,7 @@ static inline HWY_MAYBE_UNUSED const cha - nullptr, /* reserved */ \ - HWY_CHOOSE_WASM(func_name) /* WASM */ - --#endif // HWY_ARCH_WASM -- --#if HWY_ARCH_RVV -+#elif HWY_ARCH_RVV - // See HWY_ARCH_X86 above for details. - #define HWY_MAX_DYNAMIC_TARGETS 4 - #define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_RVV -@@ -454,7 +476,12 @@ static inline HWY_MAYBE_UNUSED const cha - nullptr, /* reserved */ \ - HWY_CHOOSE_RVV(func_name) /* RVV */ - --#endif // HWY_ARCH_RVV -+#else -+// Unknown architecture, will use HWY_SCALAR without dynamic dispatch, though -+// still creating single-entry tables in HWY_EXPORT to ensure portability. -+#define HWY_MAX_DYNAMIC_TARGETS 1 -+#define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_SCALAR -+#endif - - struct ChosenTarget { - public: -diff -up chromium-92.0.4515.107/third_party/highway/src/hwy/tests/memory_test.cc.update-highway-0.12.2 chromium-92.0.4515.107/third_party/highway/src/hwy/tests/memory_test.cc ---- chromium-92.0.4515.107/third_party/highway/src/hwy/tests/memory_test.cc.update-highway-0.12.2 2021-07-19 14:47:23.000000000 -0400 -+++ chromium-92.0.4515.107/third_party/highway/src/hwy/tests/memory_test.cc 2021-07-26 17:10:40.022319820 -0400 -@@ -12,6 +12,12 @@ - // See the License for the specific language governing permissions and - // limitations under the License. - -+// Ensure incompabilities with Windows macros (e.g. #define StoreFence) are -+// detected. Must come before Highway headers. -+#if defined(_WIN32) || defined(_WIN64) -+#include -+#endif -+ - #include - #include - -@@ -199,13 +205,14 @@ struct TestLoadDup128 { - for (size_t i = 0; i < N128; ++i) { - lanes[i] = static_cast(1 + i); - } -- const auto v = LoadDup128(d, lanes); -+ - const size_t N = Lanes(d); -- auto out = AllocateAligned(N); -- Store(v, d, out.get()); -+ auto expected = AllocateAligned(N); - for (size_t i = 0; i < N; ++i) { -- HWY_ASSERT_EQ(T(i % N128 + 1), out[i]); -+ expected[i] = static_cast(i % N128 + 1); - } -+ -+ HWY_ASSERT_VEC_EQ(d, expected.get(), LoadDup128(d, lanes)); - #else - (void)d; - #endif -@@ -327,6 +334,84 @@ HWY_NOINLINE void TestAllScatter() { - ForFloatTypes(test); - } - -+// Assumes little-endian byte order! -+struct TestScatter { -+ template -+ HWY_NOINLINE void operator()(T /*unused*/, D d) { -+ using Offset = MakeSigned; -+ -+ const size_t N = Lanes(d); -+ const size_t range = 4 * N; // number of items to scatter -+ const size_t max_bytes = range * sizeof(T); // upper bound on offset -+ -+ RandomState rng; -+ -+ // Data to be scattered -+ auto bytes = AllocateAligned(max_bytes); -+ for (size_t i = 0; i < max_bytes; ++i) { -+ bytes[i] = static_cast(Random32(&rng) & 0xFF); -+ } -+ const auto data = Load(d, reinterpret_cast(bytes.get())); -+ -+ // Scatter into these regions, ensure vector results match scalar -+ auto expected = AllocateAligned(range); -+ auto actual = AllocateAligned(range); -+ -+ const Rebind d_offsets; -+ auto offsets = AllocateAligned(N); // or indices -+ -+ for (size_t rep = 0; rep < 100; ++rep) { -+ // Byte offsets -+ std::fill(expected.get(), expected.get() + range, T(0)); -+ std::fill(actual.get(), actual.get() + range, T(0)); -+ for (size_t i = 0; i < N; ++i) { -+ offsets[i] = -+ static_cast(Random32(&rng) % (max_bytes - sizeof(T))); -+ CopyBytes( -+ bytes.get() + i * sizeof(T), -+ reinterpret_cast(expected.get()) + offsets[i]); -+ } -+ const auto voffsets = Load(d_offsets, offsets.get()); -+ ScatterOffset(data, d, actual.get(), voffsets); -+ if (!BytesEqual(expected.get(), actual.get(), max_bytes)) { -+ Print(d, "Data", data); -+ Print(d_offsets, "Offsets", voffsets); -+ HWY_ASSERT(false); -+ } -+ -+ // Indices -+ std::fill(expected.get(), expected.get() + range, T(0)); -+ std::fill(actual.get(), actual.get() + range, T(0)); -+ for (size_t i = 0; i < N; ++i) { -+ offsets[i] = static_cast(Random32(&rng) % range); -+ CopyBytes(bytes.get() + i * sizeof(T), -+ &expected[offsets[i]]); -+ } -+ const auto vindices = Load(d_offsets, offsets.get()); -+ ScatterIndex(data, d, actual.get(), vindices); -+ if (!BytesEqual(expected.get(), actual.get(), max_bytes)) { -+ Print(d, "Data", data); -+ Print(d_offsets, "Indices", vindices); -+ HWY_ASSERT(false); -+ } -+ } -+ } -+}; -+ -+HWY_NOINLINE void TestAllScatter() { -+ // No u8,u16,i8,i16. -+ const ForPartialVectors test; -+ test(uint32_t()); -+ test(int32_t()); -+ -+#if HWY_CAP_INTEGER64 -+ test(uint64_t()); -+ test(int64_t()); -+#endif -+ -+ ForFloatTypes(test); -+} -+ - struct TestGather { - template - HWY_NOINLINE void operator()(T /*unused*/, D d) { -@@ -391,6 +476,7 @@ HWY_NOINLINE void TestAllCache() { - int test = 0; - Prefetch(&test); - FlushCacheline(&test); -+ Pause(); - } - - // NOLINTNEXTLINE(google-readability-namespace-comments) -diff -up chromium-92.0.4515.107/third_party/highway/src/hwy/tests/swizzle_test.cc.update-highway-0.12.2 chromium-92.0.4515.107/third_party/highway/src/hwy/tests/swizzle_test.cc ---- chromium-92.0.4515.107/third_party/highway/src/hwy/tests/swizzle_test.cc.update-highway-0.12.2 2021-07-19 14:47:23.000000000 -0400 -+++ chromium-92.0.4515.107/third_party/highway/src/hwy/tests/swizzle_test.cc 2021-07-26 17:10:40.023319835 -0400 -@@ -223,6 +223,7 @@ struct TestTableLookupBytes { - HWY_NOINLINE void TestAllTableLookupBytes() { - ForIntegerTypes(ForPartialVectors()); - } -+ - struct TestTableLookupLanes { - #if HWY_TARGET == HWY_RVV - using Index = uint32_t; -@@ -242,12 +243,13 @@ struct TestTableLookupLanes { - if (N <= 8) { // Test all permutations - for (size_t i0 = 0; i0 < N; ++i0) { - idx[0] = static_cast(i0); -+ - for (size_t i1 = 0; i1 < N; ++i1) { -- idx[1] = static_cast(i1); -+ if (N >= 2) idx[1] = static_cast(i1); - for (size_t i2 = 0; i2 < N; ++i2) { -- idx[2] = static_cast(i2); -+ if (N >= 4) idx[2] = static_cast(i2); - for (size_t i3 = 0; i3 < N; ++i3) { -- idx[3] = static_cast(i3); -+ if (N >= 4) idx[3] = static_cast(i3); - - for (size_t i = 0; i < N; ++i) { - expected[i] = static_cast(idx[i] + 1); // == v[idx[i]] -@@ -286,7 +288,7 @@ struct TestTableLookupLanes { - }; - - HWY_NOINLINE void TestAllTableLookupLanes() { -- const ForFullVectors test; -+ const ForPartialVectors test; - test(uint32_t()); - test(int32_t()); - test(float()); -diff -up chromium-92.0.4515.107/third_party/highway/src/README.md.update-highway-0.12.2 chromium-92.0.4515.107/third_party/highway/src/README.md ---- chromium-92.0.4515.107/third_party/highway/src/README.md.update-highway-0.12.2 2021-07-26 17:10:40.838332249 -0400 -+++ chromium-92.0.4515.107/third_party/highway/src/README.md 2021-07-26 17:15:00.832292309 -0400 -@@ -15,7 +15,7 @@ applying the same operation to 'lanes'. - ## Current status - - Supported targets: scalar, SSE4, AVX2, AVX-512, NEON (ARMv7 and v8), WASM SIMD. --A port to RVV is in progress. -+Ports to RVV and SVE/SVE2 are in progress. - - Version 0.11 is considered stable enough to use in other projects, and is - expected to remain backwards compatible unless serious issues are discovered -@@ -23,8 +23,11 @@ while implementing SVE/RVV targets. Afte - reach version 1.0. - - Continuous integration tests build with a recent version of Clang (running on --x86 and QEMU for ARM) and MSVC from VS2015 (running on x86). Also periodically --tested on x86 with Clang 7-11 and GCC 8, 9 and 10.2.1. -+x86 and QEMU for ARM) and MSVC from VS2015 (running on x86). -+ -+Before releases, we also test on x86 with Clang and GCC, and ARMv7/8 via -+GCC cross-compile and QEMU. See the -+[testing process](g3doc/release_testing_process.md) for details. - - The `contrib` directory contains SIMD-related utilities: an image class with - aligned rows, and a math library (16 functions already implemented, mostly -@@ -63,6 +66,8 @@ To test on all the attainable targets fo - default configuration skips baseline targets (e.g. scalar) that are superseded - by another baseline target. - -+Bazel is also supported for building, but it is not as widely used/tested. -+ - ## Quick start - - You can use the `benchmark` inside examples/ as a starting point. -diff -up chromium-92.0.4515.107/third_party/highway/src/run_tests.bat.update-highway-0.12.2 chromium-92.0.4515.107/third_party/highway/src/run_tests.bat ---- chromium-92.0.4515.107/third_party/highway/src/run_tests.bat.update-highway-0.12.2 2021-07-19 14:47:23.000000000 -0400 -+++ chromium-92.0.4515.107/third_party/highway/src/run_tests.bat 2021-07-26 17:14:47.466088723 -0400 -@@ -2,9 +2,9 @@ - REM Switch directory of this batch file - cd %~dp0 - --if not exist build mkdir build -+if not exist build_win mkdir build_win - --cd build -+cd build_win - cmake .. -G Ninja || goto error - ninja || goto error - ctest -j || goto error diff --git a/chromium-93-BluetoothLowEnergyScanFilter-include.patch b/chromium-93-BluetoothLowEnergyScanFilter-include.patch deleted file mode 100644 index aa1c033..0000000 --- a/chromium-93-BluetoothLowEnergyScanFilter-include.patch +++ /dev/null @@ -1,30 +0,0 @@ -From 2f5514051210388bfcff605570d33f08cfa7bcaa Mon Sep 17 00:00:00 2001 -From: Jose Dapena Paz -Date: Wed, 21 Jul 2021 08:34:58 +0000 -Subject: [PATCH] IWYU: usage of unique_ptr requires including in bluetooth low energy scan filter. - -Fix build because of missing include: -../../device/bluetooth/bluetooth_low_energy_scan_filter.h:57:15: error: ‘unique_ptr’ in namespace ‘std’ does not name a template type - 57 | static std::unique_ptr Create( - | ^~~~~~~~~~ - -Bug: 819294 -Change-Id: I347953a083f1bcdf744fd86e1a73954c6f86b32e -Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/3041155 -Reviewed-by: Reilly Grant -Commit-Queue: José Dapena Paz -Cr-Commit-Position: refs/heads/master@{#903819} ---- - -diff --git a/device/bluetooth/bluetooth_low_energy_scan_filter.h b/device/bluetooth/bluetooth_low_energy_scan_filter.h -index a0436c1..7ae606c 100644 ---- a/device/bluetooth/bluetooth_low_energy_scan_filter.h -+++ b/device/bluetooth/bluetooth_low_energy_scan_filter.h -@@ -7,6 +7,7 @@ - - #include - #include -+#include - #include - - #include "base/time/time.h" diff --git a/chromium-93-ClassProperty-include.patch b/chromium-93-ClassProperty-include.patch deleted file mode 100644 index b5f81b4..0000000 --- a/chromium-93-ClassProperty-include.patch +++ /dev/null @@ -1,30 +0,0 @@ -From 8ae99ee447cf5f0160ea4ae978cdf37f5dcecd1e Mon Sep 17 00:00:00 2001 -From: Jose Dapena Paz -Date: Wed, 21 Jul 2021 08:36:20 +0000 -Subject: [PATCH] IWYU: missing memory include for unique_ptr usage in class_property.h - -Fix GCC build breakage because of missing inclide: -./../ui/base/class_property.h:120:58: error: ‘std::unique_ptr’ has not been declared - 120 | T* SetProperty(const ClassProperty* property, std::unique_ptr value); - | ^~~~~~~~~~ - -Bug: 819294 -Change-Id: I46b921876702b8d44674689bbb5acdc107db21e5 -Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/3041030 -Reviewed-by: Peter Boström -Commit-Queue: José Dapena Paz -Cr-Commit-Position: refs/heads/master@{#903820} ---- - -diff --git a/ui/base/class_property.h b/ui/base/class_property.h -index f7b2f55..88b4938 100644 ---- a/ui/base/class_property.h -+++ b/ui/base/class_property.h -@@ -8,6 +8,7 @@ - #include - - #include -+#include - #include - #include - diff --git a/chromium-93-ContextSet-permissive.patch b/chromium-93-ContextSet-permissive.patch deleted file mode 100644 index 4606504..0000000 --- a/chromium-93-ContextSet-permissive.patch +++ /dev/null @@ -1,47 +0,0 @@ -From 7108f83c8ad1bad4072e4f32da3db6d59cf51400 Mon Sep 17 00:00:00 2001 -From: Ivan Murashov -Date: Tue, 20 Jul 2021 13:16:44 +0300 -Subject: [PATCH] GCC: Remove double declaration of ContextSet - -After the CL -https://chromium-review.googlesource.com/c/angle/angle/+/2965780 -the build with GCC failed with error: -/third_party/angle/src/libANGLE/Display.h:325:37: error: declaration of -'typedef class std::__1::set egl::Display::ContextSet' -changes meaning of 'ContextSet' [-fpermissive] -/third_party/angle/src/libANGLE/Display.h:75:7: note: 'ContextSet' -declared here as 'using ContextSet = class std::__1::set' - -To fix the error the double declaration of ContextSet is removed. - -Bug: angleproject:5878, chromium:819294 -Change-Id: Id9e52061af53ea18dd5d13b960daaa67a14f61ca -Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/3038804 -Reviewed-by: Jamie Madill -Commit-Queue: Jamie Madill ---- - -diff --git a/third_party/angle/CONTRIBUTORS b/third_party/angle/CONTRIBUTORS -index 887ddc2..94b1b4d 100644 ---- a/third_party/angle/CONTRIBUTORS -+++ b/third_party/angle/CONTRIBUTORS -@@ -154,6 +154,7 @@ - - LG Electronics, Inc. - Jani Hautakangas -+ Ivan Murashov - - IBM Inc. - Junliang Yan -diff --git a/third_party/angle/src/libANGLE/Display.h b/third_party/angle/src/libANGLE/Display.h -index f33123b..f0c0910 100644 ---- a/third_party/angle/src/libANGLE/Display.h -+++ b/third_party/angle/src/libANGLE/Display.h -@@ -322,7 +322,6 @@ - - ConfigSet mConfigSet; - -- typedef std::set ContextSet; - ContextSet mContextSet; - - typedef std::set ImageSet; diff --git a/chromium-93-DevToolsEmbedderMessageDispatcher-include.patch b/chromium-93-DevToolsEmbedderMessageDispatcher-include.patch deleted file mode 100644 index dae81c0..0000000 --- a/chromium-93-DevToolsEmbedderMessageDispatcher-include.patch +++ /dev/null @@ -1,24 +0,0 @@ -From 409859ad9ba763a4267fb3457df7cd8eb0b7387b Mon Sep 17 00:00:00 2001 -From: Stephan Hartmann -Date: Sun, 25 Jul 2021 19:43:45 +0000 -Subject: [PATCH] IWYU: add vector for std::vector - ---- - chrome/browser/devtools/devtools_embedder_message_dispatcher.h | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/chrome/browser/devtools/devtools_embedder_message_dispatcher.h b/chrome/browser/devtools/devtools_embedder_message_dispatcher.h -index 12f8500..4007112 100644 ---- a/chrome/browser/devtools/devtools_embedder_message_dispatcher.h -+++ b/chrome/browser/devtools/devtools_embedder_message_dispatcher.h -@@ -8,6 +8,7 @@ - #include - #include - #include -+#include - - #include "base/callback.h" - #include "ui/gfx/geometry/insets.h" --- -2.31.1 - diff --git a/chromium-93-FormForest-constexpr.patch b/chromium-93-FormForest-constexpr.patch deleted file mode 100644 index 56f7492..0000000 --- a/chromium-93-FormForest-constexpr.patch +++ /dev/null @@ -1,49 +0,0 @@ -From 802150d7be94e5317b257df545d55ce5b007ae65 Mon Sep 17 00:00:00 2001 -From: Jose Dapena Paz -Date: Tue, 20 Jul 2021 18:50:11 +0000 -Subject: [PATCH] libstdc++: do not use unique_ptr bool() operator in a constexpr in form_forest.h - -Fix build breakage in GCC, because of calling non constexpr functions from a -constexpr function. In this case, libstdc++ unique_ptr bool() operator is not -constexpr, so it cannot be used inside CompareByFrameToken. - -An example of build breakage caused by this: - ../../components/autofill/content/browser/form_forest.h:157:21: error: call to non-‘constexpr’ function ‘std::unique_ptr<_Tp, _Dp>::operator bool() const [with _Tp = autofill::internal::FormForest::FrameData; _Dp = std::default_delete]’ - 157 | return f && g ? f->frame_token < g->frame_token : f.get() < g.get(); - | ^ - -Bug: 957519 -Change-Id: I3c49559084fe58886a03520729873b7c4ac89bbf -Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/3041050 -Commit-Queue: Dominic Battré -Reviewed-by: Dominic Battré -Cr-Commit-Position: refs/heads/master@{#903595} ---- - -diff --git a/components/autofill/content/browser/form_forest.h b/components/autofill/content/browser/form_forest.h -index c89a8eb..f414ab8 100644 ---- a/components/autofill/content/browser/form_forest.h -+++ b/components/autofill/content/browser/form_forest.h -@@ -152,16 +152,16 @@ - // used by FrameData sets. - struct CompareByFrameToken { - using is_transparent = void; -- constexpr bool operator()(const std::unique_ptr& f, -- const std::unique_ptr& g) const { -+ bool operator()(const std::unique_ptr& f, -+ const std::unique_ptr& g) const { - return f && g ? f->frame_token < g->frame_token : f.get() < g.get(); - } -- constexpr bool operator()(const std::unique_ptr& f, -- const LocalFrameToken& g) const { -+ bool operator()(const std::unique_ptr& f, -+ const LocalFrameToken& g) const { - return f ? f->frame_token < g : true; - } -- constexpr bool operator()(const LocalFrameToken& f, -- const std::unique_ptr& g) const { -+ bool operator()(const LocalFrameToken& f, -+ const std::unique_ptr& g) const { - return g ? f < g->frame_token : false; - } - }; diff --git a/chromium-93-HashPasswordManager-include.patch b/chromium-93-HashPasswordManager-include.patch deleted file mode 100644 index 7e9f386..0000000 --- a/chromium-93-HashPasswordManager-include.patch +++ /dev/null @@ -1,30 +0,0 @@ -From 17d0e3dfcd0690df0e7b212fedcb95402f16935d Mon Sep 17 00:00:00 2001 -From: Jose Dapena Paz -Date: Fri, 23 Jul 2021 10:17:49 +0000 -Subject: [PATCH] IWYU: missing include for using std::vector in hash password manager. - -Fix build breakage: -../../components/password_manager/core/browser/hash_password_manager.h:44:8: error: ‘vector’ in namespace ‘std’ does not name a template type - 44 | std::vector RetrieveAllPasswordHashes(); - | ^~~~~~ - -Bug: 819294 -Change-Id: I8c8a4ec3972eedb87a312c5ec56adf4a21b1b2a2 -Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/3041046 -Commit-Queue: Vasilii Sukhanov -Reviewed-by: Vasilii Sukhanov -Cr-Commit-Position: refs/heads/master@{#904696} ---- - -diff --git a/components/password_manager/core/browser/hash_password_manager.h b/components/password_manager/core/browser/hash_password_manager.h -index c762c5a..85e656ed 100644 ---- a/components/password_manager/core/browser/hash_password_manager.h -+++ b/components/password_manager/core/browser/hash_password_manager.h -@@ -6,6 +6,7 @@ - #define COMPONENTS_PASSWORD_MANAGER_CORE_BROWSER_HASH_PASSWORD_MANAGER_H_ - - #include -+#include - - #include "base/callback.h" - #include "base/callback_list.h" diff --git a/chromium-93-ScopedTestDialogAutoConfirm-include.patch b/chromium-93-ScopedTestDialogAutoConfirm-include.patch deleted file mode 100644 index 1ff0673..0000000 --- a/chromium-93-ScopedTestDialogAutoConfirm-include.patch +++ /dev/null @@ -1,24 +0,0 @@ -From 92fc089d50fc81b9903cd0573c95749e41081474 Mon Sep 17 00:00:00 2001 -From: Stephan Hartmann -Date: Sun, 25 Jul 2021 21:38:26 +0000 -Subject: [PATCH] IWYU: add cstring for std::strcpy - ---- - extensions/browser/extension_dialog_auto_confirm.cc | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/extensions/browser/extension_dialog_auto_confirm.cc b/extensions/browser/extension_dialog_auto_confirm.cc -index adb4ac3..be8b161 100644 ---- a/extensions/browser/extension_dialog_auto_confirm.cc -+++ b/extensions/browser/extension_dialog_auto_confirm.cc -@@ -4,6 +4,7 @@ - - #include "extensions/browser/extension_dialog_auto_confirm.h" - -+#include - #include - - #include "base/check.h" --- -2.31.1 - diff --git a/chromium-93-pdfium-include.patch b/chromium-93-pdfium-include.patch deleted file mode 100644 index 72f2abd..0000000 --- a/chromium-93-pdfium-include.patch +++ /dev/null @@ -1,117 +0,0 @@ -From 7a6289c5ace52cf88f0e19caa5f78b7c15d0e7a6 Mon Sep 17 00:00:00 2001 -From: Miklos Vajna -Date: Wed, 21 Jul 2021 17:42:30 +0000 -Subject: [PATCH] fxcodec, fxge: fix missing includes with libstdc++ - -These missing includes break the build with gcc/libstdc++, they were not -a problem in practice with clang/libc++. - -Change-Id: I40013f97ba7ab06f32aa59f87b04aec06a19478c -Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/83210 -Commit-Queue: Lei Zhang -Reviewed-by: Lei Zhang ---- - -diff --git a/third_party/pdfium/core/fxcodec/jpeg/jpegmodule.cpp b/third_party/pdfium/core/fxcodec/jpeg/jpegmodule.cpp -index cea0679..036f250 100644 ---- a/third_party/pdfium/core/fxcodec/jpeg/jpegmodule.cpp -+++ b/third_party/pdfium/core/fxcodec/jpeg/jpegmodule.cpp -@@ -7,6 +7,7 @@ - #include "core/fxcodec/jpeg/jpegmodule.h" - - #include -+#include - - #include - #include -diff --git a/third_party/pdfium/core/fxcodec/jpx/cjpx_decoder.cpp b/third_party/pdfium/core/fxcodec/jpx/cjpx_decoder.cpp -index c66985a..9c1122b 100644 ---- a/third_party/pdfium/core/fxcodec/jpx/cjpx_decoder.cpp -+++ b/third_party/pdfium/core/fxcodec/jpx/cjpx_decoder.cpp -@@ -6,6 +6,8 @@ - - #include "core/fxcodec/jpx/cjpx_decoder.h" - -+#include -+ - #include - #include - #include -diff --git a/third_party/pdfium/core/fxge/cfx_cliprgn.cpp b/third_party/pdfium/core/fxge/cfx_cliprgn.cpp -index 5369d52..d198852 100644 ---- a/third_party/pdfium/core/fxge/cfx_cliprgn.cpp -+++ b/third_party/pdfium/core/fxge/cfx_cliprgn.cpp -@@ -6,6 +6,8 @@ - - #include "core/fxge/cfx_cliprgn.h" - -+#include -+ - #include - - #include "core/fxge/dib/cfx_dibitmap.h" -diff --git a/third_party/pdfium/core/fxge/dib/cfx_bitmapcomposer.cpp b/third_party/pdfium/core/fxge/dib/cfx_bitmapcomposer.cpp -index 6f9b420..0f1ffae 100644 ---- a/third_party/pdfium/core/fxge/dib/cfx_bitmapcomposer.cpp -+++ b/third_party/pdfium/core/fxge/dib/cfx_bitmapcomposer.cpp -@@ -6,6 +6,8 @@ - - #include "core/fxge/dib/cfx_bitmapcomposer.h" - -+#include -+ - #include "core/fxge/cfx_cliprgn.h" - #include "core/fxge/dib/cfx_dibitmap.h" - -diff --git a/third_party/pdfium/core/fxge/dib/cfx_bitmapstorer.cpp b/third_party/pdfium/core/fxge/dib/cfx_bitmapstorer.cpp -index f57c00e..45a0a18 100644 ---- a/third_party/pdfium/core/fxge/dib/cfx_bitmapstorer.cpp -+++ b/third_party/pdfium/core/fxge/dib/cfx_bitmapstorer.cpp -@@ -6,6 +6,8 @@ - - #include "core/fxge/dib/cfx_bitmapstorer.h" - -+#include -+ - #include - - #include "core/fxge/dib/cfx_dibitmap.h" -diff --git a/third_party/pdfium/core/fxge/dib/cfx_dibbase.cpp b/third_party/pdfium/core/fxge/dib/cfx_dibbase.cpp -index 4ec0ddb..a1de2fb 100644 ---- a/third_party/pdfium/core/fxge/dib/cfx_dibbase.cpp -+++ b/third_party/pdfium/core/fxge/dib/cfx_dibbase.cpp -@@ -6,6 +6,8 @@ - - #include "core/fxge/dib/cfx_dibbase.h" - -+#include -+ - #include - #include - #include -diff --git a/third_party/pdfium/core/fxge/dib/cfx_dibitmap.cpp b/third_party/pdfium/core/fxge/dib/cfx_dibitmap.cpp -index d7ccf6c..94e8acc 100644 ---- a/third_party/pdfium/core/fxge/dib/cfx_dibitmap.cpp -+++ b/third_party/pdfium/core/fxge/dib/cfx_dibitmap.cpp -@@ -6,6 +6,8 @@ - - #include "core/fxge/dib/cfx_dibitmap.h" - -+#include -+ - #include - #include - #include -diff --git a/third_party/pdfium/core/fxge/dib/cfx_scanlinecompositor.cpp b/third_party/pdfium/core/fxge/dib/cfx_scanlinecompositor.cpp -index e8362d7..c04c6dc 100644 ---- a/third_party/pdfium/core/fxge/dib/cfx_scanlinecompositor.cpp -+++ b/third_party/pdfium/core/fxge/dib/cfx_scanlinecompositor.cpp -@@ -6,6 +6,8 @@ - - #include "core/fxge/dib/cfx_scanlinecompositor.h" - -+#include -+ - #include - - #include "core/fxge/dib/fx_dib.h" diff --git a/chromium-93.0.4577.63-remoting-nodestructor-fix.patch b/chromium-93.0.4577.63-remoting-nodestructor-fix.patch deleted file mode 100644 index 6f2e419..0000000 --- a/chromium-93.0.4577.63-remoting-nodestructor-fix.patch +++ /dev/null @@ -1,21 +0,0 @@ -diff -up chromium-93.0.4577.63/remoting/host/remote_open_url_client.cc.remoting-nodestructor-fix chromium-93.0.4577.63/remoting/host/remote_open_url_client.cc ---- chromium-93.0.4577.63/remoting/host/remote_open_url_client.cc.remoting-nodestructor-fix 2021-09-03 17:30:44.162605313 +0000 -+++ chromium-93.0.4577.63/remoting/host/remote_open_url_client.cc 2021-09-03 17:31:01.017986003 +0000 -@@ -56,7 +56,7 @@ void ShowMessageDialog(const std::string - } - - bool IsBrowserValid(const std::string& browser) { -- static const base::NoDestructor> invalid_browsers( -+ static const base::NoDestructor> invalid_browsers{ - { - // This is the chromoting forwarder itself. - "crd-url-forwarder.desktop", -@@ -64,7 +64,7 @@ bool IsBrowserValid(const std::string& b - // XFCE's forwarder. May potentially launch the chromoting forwarder - // recursively. - "xfce4-web-browser.desktop", -- }); -+ }}; - if (browser.empty()) { - return false; - } diff --git a/chromium-93.0.4577.82-harfbuzz3.patch b/chromium-93.0.4577.82-harfbuzz3.patch deleted file mode 100644 index 1ed23fe..0000000 --- a/chromium-93.0.4577.82-harfbuzz3.patch +++ /dev/null @@ -1,97 +0,0 @@ -diff -up chromium-93.0.4577.82/components/paint_preview/common/subset_font.cc.hbfix chromium-93.0.4577.82/components/paint_preview/common/subset_font.cc ---- chromium-93.0.4577.82/components/paint_preview/common/subset_font.cc.hbfix 2021-09-20 11:37:58.633517817 -0400 -+++ chromium-93.0.4577.82/components/paint_preview/common/subset_font.cc 2021-09-20 11:40:12.111235467 -0400 -@@ -72,9 +72,11 @@ sk_sp SubsetFont(SkTypeface* typ - hb_set_t* glyphs = - hb_subset_input_glyph_set(input.get()); // Owned by |input|. - usage.ForEach(base::BindRepeating(&AddGlyphs, base::Unretained(glyphs))); -- hb_subset_input_set_retain_gids(input.get(), true); -+ hb_subset_input_set_flags(input.get(), HB_SUBSET_FLAGS_RETAIN_GIDS); - -- HbScoped subset_face(hb_subset(face.get(), input.get())); -+ HbScoped subset_face(hb_subset_or_fail(face.get(), input.get())); -+ if (!subset_face) -+ return nullptr; - HbScoped subset_blob(hb_face_reference_blob(subset_face.get())); - if (!subset_blob) - return nullptr; -diff -up chromium-93.0.4577.82/third_party/skia/gn/skia.gni.hbfix chromium-93.0.4577.82/third_party/skia/gn/skia.gni ---- chromium-93.0.4577.82/third_party/skia/gn/skia.gni.hbfix 2021-09-20 11:41:20.078600944 -0400 -+++ chromium-93.0.4577.82/third_party/skia/gn/skia.gni 2021-09-20 11:42:29.851976086 -0400 -@@ -33,8 +33,6 @@ declare_args() { - skia_include_multiframe_procs = false - skia_lex = false - skia_libgifcodec_path = "third_party/externals/libgifcodec" -- skia_pdf_subset_harfbuzz = -- false # TODO: set skia_pdf_subset_harfbuzz to skia_use_harfbuzz. - skia_qt_path = getenv("QT_PATH") - skia_skqp_global_error_tolerance = 0 - skia_tools_require_resources = false -@@ -99,6 +97,10 @@ declare_args() { - } - - declare_args() { -+ skia_pdf_subset_harfbuzz = skia_use_harfbuzz -+} -+ -+declare_args() { - skia_compile_sksl_tests = skia_compile_processors - skia_enable_fontmgr_android = skia_use_expat && skia_use_freetype - skia_enable_fontmgr_custom_directory = skia_use_freetype && !is_fuchsia -diff -up chromium-93.0.4577.82/third_party/skia/src/pdf/SkPDFSubsetFont.cpp.hbfix chromium-93.0.4577.82/third_party/skia/src/pdf/SkPDFSubsetFont.cpp ---- chromium-93.0.4577.82/third_party/skia/src/pdf/SkPDFSubsetFont.cpp.hbfix 2021-09-20 13:34:57.861369449 -0400 -+++ chromium-93.0.4577.82/third_party/skia/src/pdf/SkPDFSubsetFont.cpp 2021-09-20 13:38:28.063504311 -0400 -@@ -49,6 +49,37 @@ static sk_sp to_data(HBBlob blob - blob.release()); - } - -+template using void_t = void; -+template -+struct SkPDFHarfBuzzSubset { -+ // This is the HarfBuzz 3.0 interface. -+ // hb_subset_flags_t does not exist in 2.0. It isn't dependent on T, so inline the value of -+ // HB_SUBSET_FLAGS_RETAIN_GIDS until 2.0 is no longer supported. -+ static HBFace Make(T input, hb_face_t* face) { -+ // TODO: When possible, check if a font is 'tricky' with FT_IS_TRICKY. -+ // If it isn't known if a font is 'tricky', retain the hints. -+ hb_subset_input_set_flags(input, 2/*HB_SUBSET_FLAGS_RETAIN_GIDS*/); -+ return HBFace(hb_subset_or_fail(face, input)); -+ } -+}; -+template -+struct SkPDFHarfBuzzSubset(), std::declval())), -+ decltype(hb_subset_input_set_drop_hints(std::declval(), std::declval())), -+ decltype(hb_subset(std::declval(), std::declval())) -+ >> -+{ -+ // This is the HarfBuzz 2.0 (non-public) interface, used if it exists. -+ // This code should be removed as soon as all users are migrated to the newer API. -+ static HBFace Make(T input, hb_face_t* face) { -+ hb_subset_input_set_retain_gids(input, true); -+ // TODO: When possible, check if a font is 'tricky' with FT_IS_TRICKY. -+ // If it isn't known if a font is 'tricky', retain the hints. -+ hb_subset_input_set_drop_hints(input, false); -+ return HBFace(hb_subset(face, input)); -+ } -+}; -+ - static sk_sp subset_harfbuzz(sk_sp fontData, - const SkPDFGlyphUse& glyphUsage, - int ttcIndex) { -@@ -71,11 +102,10 @@ static sk_sp subset_harfbuzz(sk_ - hb_set_t* glyphs = hb_subset_input_glyph_set(input.get()); - glyphUsage.getSetValues([&glyphs](unsigned gid) { hb_set_add(glyphs, gid);}); - -- hb_subset_input_set_retain_gids(input.get(), true); -- // TODO: When possible, check if a font is 'tricky' with FT_IS_TRICKY. -- // If it isn't known if a font is 'tricky', retain the hints. -- hb_subset_input_set_drop_hints(input.get(), false); -- HBFace subset(hb_subset(face.get(), input.get())); -+ HBFace subset = SkPDFHarfBuzzSubset::Make(input.get(), face.get()); -+ if (!subset) { -+ return nullptr; -+ } - HBBlob result(hb_face_reference_blob(subset.get())); - return to_data(std::move(result)); - } diff --git a/chromium-94-ConversionStorageSql-lambda.patch b/chromium-94-ConversionStorageSql-lambda.patch new file mode 100644 index 0000000..6746fec --- /dev/null +++ b/chromium-94-ConversionStorageSql-lambda.patch @@ -0,0 +1,31 @@ +From 45bea088d3771c7ff9f77173e451422452c031b3 Mon Sep 17 00:00:00 2001 +From: Stephan Hartmann +Date: Mon, 02 Aug 2021 16:57:05 +0000 +Subject: [PATCH] GCC: drop WARN_USED_RESULT in lambda in ConversionStorageSql + +GCC 9.3 only allows GNU attributes between [] and () in lambda +expressions. See https://gcc.gnu.org/PR90333 for details. However, +clang only allows attributes after () only. Seems not strictly +necessary to enforce the attribute here. + +Bug: 819294 +Change-Id: I342deb25239837dea0f6f5e7709b1467789e342b +--- + +diff --git a/content/browser/conversions/conversion_storage_sql.cc b/content/browser/conversions/conversion_storage_sql.cc +index 84bc897..b6fc4e9 100644 +--- a/content/browser/conversions/conversion_storage_sql.cc ++++ b/content/browser/conversions/conversion_storage_sql.cc +@@ -688,9 +688,11 @@ + bool ConversionStorageSql::DeleteExpiredImpressions() { + const int kMaxDeletesPerBatch = 100; + ++ // GCC accepts attribute between [] and () only ++ // clang accepts attribute after () only + auto delete_impressions_from_paged_select = + [this](sql::Statement& statement) +- VALID_CONTEXT_REQUIRED(sequence_checker_) WARN_UNUSED_RESULT -> bool { ++ VALID_CONTEXT_REQUIRED(sequence_checker_) -> bool { + while (true) { + std::vector impression_ids; + while (statement.Step()) { diff --git a/chromium-94-CustomSpaces-include.patch b/chromium-94-CustomSpaces-include.patch new file mode 100644 index 0000000..0175775 --- /dev/null +++ b/chromium-94-CustomSpaces-include.patch @@ -0,0 +1,24 @@ +From 4eeacdaa57b29a079fe09315eb22557c06aa522e Mon Sep 17 00:00:00 2001 +From: Stephan Hartmann +Date: Fri, 13 Aug 2021 12:57:42 +0000 +Subject: [PATCH] IWYU: add memory for std::unique_ptr in blink::CustomSpaces + +--- + .../blink/renderer/platform/heap/v8_wrapper/custom_spaces.h | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/third_party/blink/renderer/platform/heap/v8_wrapper/custom_spaces.h b/third_party/blink/renderer/platform/heap/v8_wrapper/custom_spaces.h +index df0465a..640cb33 100644 +--- a/third_party/blink/renderer/platform/heap/v8_wrapper/custom_spaces.h ++++ b/third_party/blink/renderer/platform/heap/v8_wrapper/custom_spaces.h +@@ -5,6 +5,7 @@ + #ifndef THIRD_PARTY_BLINK_RENDERER_PLATFORM_HEAP_V8_WRAPPER_CUSTOM_SPACES_H_ + #define THIRD_PARTY_BLINK_RENDERER_PLATFORM_HEAP_V8_WRAPPER_CUSTOM_SPACES_H_ + ++#include + #include + + #include "third_party/blink/renderer/platform/platform_export.h" +-- +2.31.1 + diff --git a/chromium-94.0.4606.54-gcc9-drop-rsp-clobber.patch b/chromium-94.0.4606.54-gcc9-drop-rsp-clobber.patch new file mode 100644 index 0000000..0ccd0b1 --- /dev/null +++ b/chromium-94.0.4606.54-gcc9-drop-rsp-clobber.patch @@ -0,0 +1,12 @@ +diff -up chromium-94.0.4606.54/third_party/tcmalloc/vendor/src/base/linux_syscall_support.h.gcc9 chromium-94.0.4606.54/third_party/tcmalloc/vendor/src/base/linux_syscall_support.h +--- chromium-94.0.4606.54/third_party/tcmalloc/vendor/src/base/linux_syscall_support.h.gcc9 2021-09-20 15:13:05.000000000 -0400 ++++ chromium-94.0.4606.54/third_party/tcmalloc/vendor/src/base/linux_syscall_support.h 2021-09-23 12:41:53.540357702 -0400 +@@ -1485,7 +1485,7 @@ struct kernel_stat { + "d"(LSS_SYSCALL_ARG(parent_tidptr)), + "r"(LSS_SYSCALL_ARG(newtls)), + "r"(LSS_SYSCALL_ARG(child_tidptr)) +- : "rsp", "memory", "r8", "r10", "r11", "rcx"); ++ : "memory", "r8", "r10", "r11", "rcx"); + } + LSS_RETURN(int, __res); + } diff --git a/chromium-94.0.4606.54-remoting-nodestructor-fix.patch b/chromium-94.0.4606.54-remoting-nodestructor-fix.patch new file mode 100644 index 0000000..ce64c55 --- /dev/null +++ b/chromium-94.0.4606.54-remoting-nodestructor-fix.patch @@ -0,0 +1,22 @@ +diff -up chromium-94.0.4606.54/remoting/host/remote_open_url_client.cc.remoting-nodestructor-fix chromium-94.0.4606.54/remoting/host/remote_open_url_client.cc +diff -up chromium-94.0.4606.54/remoting/host/remote_open_url_client_delegate_linux.cc.remoting-nodestructor-fix chromium-94.0.4606.54/remoting/host/remote_open_url_client_delegate_linux.cc +--- chromium-94.0.4606.54/remoting/host/remote_open_url_client_delegate_linux.cc.remoting-nodestructor-fix 2021-09-24 11:02:02.662082902 -0400 ++++ chromium-94.0.4606.54/remoting/host/remote_open_url_client_delegate_linux.cc 2021-09-24 11:06:05.207472179 -0400 +@@ -47,7 +47,7 @@ void ShowMessageDialog(const std::string + } + + bool IsBrowserValid(const std::string& browser) { +- static const base::NoDestructor> invalid_browsers( ++ static const base::NoDestructor> invalid_browsers{ + { + // This is the chromoting forwarder itself. + "crd-url-forwarder.desktop", +@@ -55,7 +55,7 @@ bool IsBrowserValid(const std::string& b + // XFCE's forwarder. May potentially launch the chromoting forwarder + // recursively. + "xfce4-web-browser.desktop", +- }); ++ }}; + if (browser.empty()) { + return false; + } diff --git a/chromium-94.0.4606.54-webrtc-BUILD.gn-fix-multiple-defines.patch b/chromium-94.0.4606.54-webrtc-BUILD.gn-fix-multiple-defines.patch new file mode 100644 index 0000000..04ed84e --- /dev/null +++ b/chromium-94.0.4606.54-webrtc-BUILD.gn-fix-multiple-defines.patch @@ -0,0 +1,16 @@ +diff -up chromium-94.0.4606.54/third_party/webrtc/BUILD.gn.fix-multiple-define chromium-94.0.4606.54/third_party/webrtc/BUILD.gn +--- chromium-94.0.4606.54/third_party/webrtc/BUILD.gn.fix-multiple-define 2021-09-24 13:22:48.067565858 -0400 ++++ chromium-94.0.4606.54/third_party/webrtc/BUILD.gn 2021-09-24 13:23:10.163664830 -0400 +@@ -119,10 +119,10 @@ config("common_inherited_config") { + ldflags = [] + + if (rtc_enable_symbol_export || is_component_build) { +- defines = [ "WEBRTC_ENABLE_SYMBOL_EXPORT" ] ++ defines += [ "WEBRTC_ENABLE_SYMBOL_EXPORT" ] + } + if (rtc_enable_objc_symbol_export) { +- defines = [ "WEBRTC_ENABLE_OBJC_SYMBOL_EXPORT" ] ++ defines += [ "WEBRTC_ENABLE_OBJC_SYMBOL_EXPORT" ] + } + + if (build_with_mozilla) { diff --git a/chromium-94.0.4606.61-remoting-extra-qualification.patch b/chromium-94.0.4606.61-remoting-extra-qualification.patch new file mode 100644 index 0000000..2b64739 --- /dev/null +++ b/chromium-94.0.4606.61-remoting-extra-qualification.patch @@ -0,0 +1,12 @@ +diff -up chromium-94.0.4606.61/remoting/host/mojom/remoting_mojom_traits.h.extra-qualification chromium-94.0.4606.61/remoting/host/mojom/remoting_mojom_traits.h +--- chromium-94.0.4606.61/remoting/host/mojom/remoting_mojom_traits.h.extra-qualification 2021-09-25 01:48:28.812426004 +0000 ++++ chromium-94.0.4606.61/remoting/host/mojom/remoting_mojom_traits.h 2021-09-25 01:48:59.072267580 +0000 +@@ -17,7 +17,7 @@ + namespace mojo { + + template <> +-class mojo::StructTraits { + public: + static const std::string& mime_type( diff --git a/clean_ffmpeg.sh b/clean_ffmpeg.sh index 518eb5e..002dae9 100755 --- a/clean_ffmpeg.sh +++ b/clean_ffmpeg.sh @@ -79,6 +79,7 @@ header_files=" libavcodec/x86/inline_asm.h \ libavcodec/codec_par.h \ libavcodec/dct.h \ libavcodec/dct32.h \ + libavcodec/defs.h \ libavcodec/error_resilience.h \ libavcodec/fdctdsp.h \ libavcodec/fft.h \ diff --git a/sources b/sources index ba5f4e1..ba39e3b 100644 --- a/sources +++ b/sources @@ -20,5 +20,5 @@ SHA256 (NotoSansTibetan-Regular.ttf) = 5a45f430812ef981952553ef6dd6fa04c7849e978 SHA256 (node-v12.22.6-linux-x64.tar.xz) = 80fc80cdb3d829ea4d752c2e52067a426f6c4fd629ecca5a858d268af8d5ec7e SHA256 (node-v12.22.6-linux-arm64.tar.xz) = 39b1ee686c78315c04593d2e216595d052ae3378d9e50a0a72d8f2dc95e69e58 SHA256 (xcb-proto-1.14.tar.xz) = 186a3ceb26f9b4a015f5a44dcc814c93033a5fc39684f36f1ecc79834416a605 -SHA256 (chromium-93.0.4577.82.tar.xz) = 5d66214858fcba11a8f733d7a6fab61ed10e13e7df4ed37e63b66a0370fb2853 -SHA256 (ungoogled-chromium-93.0.4577.82-1.tar.gz) = ac6ff40201f389ca43a07563f7fefc2b33f956f7867d7fa8ff65312019496640 +SHA256 (chromium-94.0.4606.61.tar.xz) = 6446db535c02c461c7e5c8d294a0300db03abba791f97f0c70bc52255aedb9bf +SHA256 (ungoogled-chromium-94.0.4606.61-1.tar.gz) = 1931ff2120f2615d05597dc9f2429f3a185dd499964b3ca83e7012f629426646 diff --git a/ungoogled-chromium.spec b/ungoogled-chromium.spec index 90fa4ba..1a8a56e 100644 --- a/ungoogled-chromium.spec +++ b/ungoogled-chromium.spec @@ -145,25 +145,23 @@ BuildRequires: libicu-devel >= 5.4 %global bundlelibdrm 1 %global bundlefontconfig 1 %else +# As of Chromium 94, it uses functions in harfbuzz 2.9.0+, which is only found in F36+. +%if 0%{?fedora} >= 36 %global bundleharfbuzz 0 +%else +%global bundleharfbuzz 1 +%endif %global bundleopus 1 %global bundlelibusbx 0 %global bundlelibwebp 0 %global bundlelibpng 0 %global bundlelibjpeg 0 -%global bundlefreetype 0 +# Needs FT_ClipBox which was implemented after 2.11.0. Should be able to set this back to 0 later. +%global bundlefreetype 1 %global bundlelibdrm 0 %global bundlefontconfig 0 %endif -# Needs at least harfbuzz 2.4.0 now. -# 2019-09-13 -%if 0%{?fedora} < 31 -%global bundleharfbuzz 1 -%else -%global bundleharfbuzz 0 -%endif - ### Google API keys (see http://www.chromium.org/developers/how-tos/api-keys) ### Note: These are for Fedora use ONLY. ### For your own distribution, please get your own set of keys. @@ -175,18 +173,18 @@ BuildRequires: libicu-devel >= 5.4 #Build with debugging symbols %global debug_pkg 0 -%global majorversion 93 +%global majorversion 94 %global revision 1 # Depot tools revision -%global depot_tools_revision a806594b95a39141fdbf1f359087a44ffb2deaaf +%global depot_tools_revision 699d70d878603ebb3861c8f4487c4b05f5d81643 %if %{freeworld} Name: ungoogled-chromium%{nsuffix} %else Name: ungoogled-chromium %endif -Version: %{majorversion}.0.4577.82 +Version: %{majorversion}.0.4606.61 Release: 1%{?dist}.%{revision} %if %{?freeworld} # chromium-freeworld @@ -216,7 +214,7 @@ Patch7: chromium-71.0.3578.98-widevine-r3.patch # Disable fontconfig cache magic that breaks remoting Patch8: chromium-91.0.4472.77-disable-fontconfig-cache-magic.patch # drop rsp clobber, which breaks gcc9 (thanks to Jeff Law) -Patch9: chromium-78.0.3904.70-gcc9-drop-rsp-clobber.patch +Patch9: chromium-94.0.4606.54-gcc9-drop-rsp-clobber.patch # Try to load widevine from other places Patch10: chromium-92.0.4515.107-widevine-other-locations.patch # Tell bootstrap.py to always use the version of Python we specify @@ -243,9 +241,6 @@ Patch57: chromium-93.0.4577.63-missing-cstring.patch Patch58: chromium-53-ffmpeg-no-deprecation-errors.patch # https://github.com/stha09/chromium-patches/blob/master/chromium-91-libyuv-aarch64.patch Patch60: chromium-91-libyuv-aarch64.patch -# Update third_party/highway to 0.12.2 -# this is needed for sane arm/aarch64 -Patch61: chromium-92.0.4515.107-update-highway-0.12.2.patch # https://github.com/stha09/chromium-patches/blob/master/chromium-90-ruy-include.patch Patch62: chromium-90-ruy-include.patch # Extra CXXFLAGS for aarch64 @@ -262,9 +257,6 @@ Patch66: chromium-84.0.4147.125-remoting-cstring.patch Patch67: chromium-84.0.4147.125-i686-fix_textrels.patch # Work around binutils bug in aarch64 (F33+) Patch68: chromium-84.0.4147.125-aarch64-clearkeycdm-binutils-workaround.patch -# Fix sandbox code to properly handle the new way that glibc handles fstat in Fedora 34+ -# Thanks to Kevin Kofler for the fix. -Patch75: chromium-90.0.4430.72-fstatfix.patch # Rawhide (f35) glibc defines SIGSTKSZ as a long instead of a constant Patch76: chromium-92.0.4515.107-rawhide-gcc-std-max-fix.patch # Do not download proprietary widevine module in the background (thanks Debian) @@ -272,27 +264,16 @@ Patch79: chromium-93.0.4577.63-widevine-no-download.patch # Fix crashes with components/cast_* # Thanks to Gentoo Patch80: chromium-92.0.4515.107-EnumTable-crash.patch -# https://github.com/stha09/chromium-patches/blob/master/chromium-93-BluetoothLowEnergyScanFilter-include.patch -Patch81: chromium-93-BluetoothLowEnergyScanFilter-include.patch -# https://github.com/stha09/chromium-patches/blob/master/chromium-93-ClassProperty-include.patch -Patch82: chromium-93-ClassProperty-include.patch + +# https://github.com/stha09/chromium-patches/blob/master/chromium-94-ConversionStorageSql-lambda.patch +Patch81: chromium-94-ConversionStorageSql-lambda.patch +# https://github.com/stha09/chromium-patches/blob/master/chromium-94-CustomSpaces-include.patch +Patch82: chromium-94-CustomSpaces-include.patch # Fixes for python3 Patch83: chromium-92.0.4515.107-py3-fixes.patch # Clean up clang-format for python3 # thanks to Jon Nettleton Patch86: chromium-93.0.4577.63-clang-format.patch -# https://github.com/stha09/chromium-patches/blob/master/chromium-93-ContextSet-permissive.patch -Patch87: chromium-93-ContextSet-permissive.patch -# https://github.com/stha09/chromium-patches/blob/master/chromium-93-DevToolsEmbedderMessageDispatcher-include.patch -Patch88: chromium-93-DevToolsEmbedderMessageDispatcher-include.patch -# https://github.com/stha09/chromium-patches/blob/master/chromium-93-FormForest-constexpr.patch -Patch89: chromium-93-FormForest-constexpr.patch -# https://github.com/stha09/chromium-patches/blob/master/chromium-93-HashPasswordManager-include.patch -Patch90: chromium-93-HashPasswordManager-include.patch -# https://github.com/stha09/chromium-patches/blob/master/chromium-93-pdfium-include.patch -Patch91: chromium-93-pdfium-include.patch -# https://github.com/stha09/chromium-patches/blob/master/chromium-93-ScopedTestDialogAutoConfirm-include.patch -Patch92: chromium-93-ScopedTestDialogAutoConfirm-include.patch # In file included from ../../components/cast_channel/enum_table.cc:5: # ../../components/cast_channel/enum_table.h:359:18: error: 'vector' in namespace 'std' does not name a template type # 359 | const std::vector data_; @@ -300,12 +281,13 @@ Patch92: chromium-93-ScopedTestDialogAutoConfirm-include.patch # ../../components/cast_channel/enum_table.h:18:1: note: 'std::vector' is defined in header ''; did you forget to '#include '? Patch93: chromium-93.0.4577.63-vector-fix.patch # Fix NoDestructor issue with gcc -Patch94: chromium-93.0.4577.63-remoting-nodestructor-fix.patch +Patch94: chromium-94.0.4606.54-remoting-nodestructor-fix.patch # include full UrlResponseHead header Patch95: chromium-93.0.4577.63-mojo-header-fix.patch -# Fix against HarfBuzz v3 -# Thanks to Jan Beich @ FreeBSD -Patch96: chromium-93.0.4577.82-harfbuzz3.patch +# Fix multiple defines issue in webrtc/BUILD.gn +Patch96: chromium-94.0.4606.54-webrtc-BUILD.gn-fix-multiple-defines.patch +# Fix extra qualification error +Patch97: chromium-94.0.4606.61-remoting-extra-qualification.patch # Use lstdc++ on EPEL7 only @@ -395,7 +377,7 @@ Source20: https://www.x.org/releases/individual/proto/xcb-proto-1.14.tar.xz Source22: %{name}.appdata.xml # ungoogled-chromium source -%global ungoogled_chromium_revision 93.0.4577.82-1 +%global ungoogled_chromium_revision 94.0.4606.61-1 Source300: https://github.com/Eloston/ungoogled-chromium/archive/%{ungoogled_chromium_revision}/ungoogled-chromium-%{ungoogled_chromium_revision}.tar.gz # We can assume gcc and binutils. @@ -539,9 +521,6 @@ BuildRequires: pkgconfig(gtk+-3.0) %else BuildRequires: pkgconfig(gtk+-2.0) %endif -%if 0%{?fedora} -#BuildRequires: %{chromium_pybin} -%endif %if ! %{build_with_python3} %if 0%{?fedora} >= 32 BuildRequires: python2.7 @@ -780,7 +759,6 @@ ln -s depot_tools-%{depot_tools_revision} ../depot_tools %patch57 -p1 -b .missing-cstring %patch58 -p1 -b .ffmpeg-deprecations %patch60 -p1 -b .libyuv-aarch64 -%patch61 -p1 -b .update-highway-0.12.2 %patch62 -p1 -b .ruy-include %patch63 -p1 -b .aarch64-cxxflags-addition %patch64 -p1 -b .java-only-allowed @@ -788,28 +766,20 @@ ln -s depot_tools-%{depot_tools_revision} ../depot_tools %patch66 -p1 -b .remoting-cstring %patch67 -p1 -b .i686-textrels %patch68 -p1 -b .aarch64-clearkeycdm-binutils-workaround -%patch75 -p1 -b .fstatfix %if 0%{?fedora} >= 35 %patch76 -p1 -b .sigstkszfix %endif %patch79 -p1 -b .widevine-no-download %patch80 -p1 -b .EnumTable-crash -%patch81 -p1 -b .BluetoothLowEnergyScanFilter-include -%patch82 -p1 -b .ClassProperty-include +%patch81 -p1 -b .ConversionStorageSql-lambda-include +%patch82 -p1 -b .CustomSpaces-include %patch83 -p1 -b .py3fixes %patch86 -p1 -b .clang-format-py3 -%patch87 -p1 -b .ContextSet-permissive -%patch88 -p1 -b .DevToolsEmbedderMessageDispatcher-include -%patch89 -p1 -b .FormForest-constexpr -%patch90 -p1 -b .HashPasswordManager-include -%patch91 -p1 -b .pdfium-include -%patch92 -p1 -b .ScopedTestDialogAutoConfirm-include %patch93 -p1 -b .vector-fix %patch94 -p1 -b .remoting-nodestructor-fix %patch95 -p1 -b .mojo-header-fix -%if 0%{?fedora} >= 36 -%patch96 -p1 -b .hbfix -%endif +%patch96 -p1 -b .webrtc-BUILD.gn-fix-multiple-defines +%patch97 -p1 -b .remoting-extra-qualification # EPEL specific patches @@ -939,7 +909,7 @@ UNGOOGLED_CHROMIUM_GN_DEFINES+=' is_debug=false is_official_build=false is_unsaf UNGOOGLED_CHROMIUM_GN_DEFINES+=' system_libdir="lib64"' %endif UNGOOGLED_CHROMIUM_GN_DEFINES+=' google_api_key="%{api_key}" google_default_client_id="%{default_client_id}" google_default_client_secret="%{default_client_secret}"' -UNGOOGLED_CHROMIUM_GN_DEFINES+=' is_clang=false use_sysroot=false fieldtrial_testing_like_official_build=true use_lld=false rtc_enable_symbol_export=true' +UNGOOGLED_CHROMIUM_GN_DEFINES+=' is_clang=false use_sysroot=false disable_fieldtrial_testing_config=true use_lld=false rtc_enable_symbol_export=true' UNGOOGLED_CHROMIUM_GN_DEFINES+=' use_gold=false' %if %{freeworld} @@ -1082,6 +1052,7 @@ build/linux/unbundle/remove_bundled_libraries.py \ 'third_party/devtools-frontend/src/front_end/third_party/marked' \ 'third_party/devtools-frontend/src/front_end/third_party/puppeteer' \ 'third_party/devtools-frontend/src/front_end/third_party/wasmparser' \ + 'third_party/devtools-frontend/src/test/unittests/front_end/third_party/i18n' \ 'third_party/dom_distiller_js' \ 'third_party/eigen3' \ 'third_party/emoji-segmenter' \ @@ -1399,7 +1370,11 @@ tar xf %{SOURCE20} %endif # export PYTHONPATH="../../third_party/pyjson5/src:../../third_party/catapult/third_party/google-endpoints:../../xcb-proto-1.14" +%if 0%{?rhel} == 8 +export PYTHONPATH="../../third_party/protobuf/third_party/six:../../third_party/pyjson5/src:../../xcb-proto-1.14:../../third_party/catapult/third_party/html5lib-1.1" +%else export PYTHONPATH="../../third_party/pyjson5/src:../../xcb-proto-1.14:../../third_party/catapult/third_party/html5lib-1.1" +%endif echo # Now do the full browser @@ -1457,7 +1432,7 @@ rm -rf %{buildroot} %endif cp -a chrome %{buildroot}%{chromium_path}/%{chromium_browser_channel} cp -a chrome_sandbox %{buildroot}%{chromium_path}/chrome-sandbox - cp -a crashpad_handler %{buildroot}%{chromium_path}/crashpad_handler + cp -a chrome_crashpad_handler %{buildroot}%{chromium_path}/chrome_crashpad_handler cp -a ../../chrome/app/resources/manpage.1.in %{buildroot}%{_mandir}/man1/%{chromium_browser_channel}.1 sed -i "s|@@PACKAGE@@|%{chromium_browser_channel}|g" %{buildroot}%{_mandir}/man1/%{chromium_browser_channel}.1 sed -i "s|@@MENUNAME@@|%{chromium_menu_name}|g" %{buildroot}%{_mandir}/man1/%{chromium_browser_channel}.1 @@ -1548,7 +1523,7 @@ fi %dir %{chromium_path} %{chromium_path}/*.bin %{chromium_path}/chrome_*.pak -%{chromium_path}/crashpad_handler +%{chromium_path}/chrome_crashpad_handler %{chromium_path}/resources.pak %{chromium_path}/icudtl.dat %{chromium_path}/%{chromium_browser_channel} @@ -1640,6 +1615,9 @@ fi %endif %changelog +* Wed Sep 28 2021 wchen342 - 94.0.4606.61-1 +- update Chromium to 94.0.4606.61 + * Wed Sep 22 2021 wchen342 - 93.0.4577.82-1 - update Chromium to 93.0.4577.82