diff --git a/runtime/docs/pragmas.md b/runtime/docs/pragmas.md index 4c5f287c5cdf..ee33555c03e9 100644 --- a/runtime/docs/pragmas.md +++ b/runtime/docs/pragmas.md @@ -38,6 +38,8 @@ These pragmas can cause unsound behavior if used incorrectly and therefore are o | `vm:exact-result-type` | [Declaring an exact result type of a method](compiler/pragmas_recognized_by_compiler.md#providing-an-exact-result-type) | | `vm:recognized` | [Marking this as a recognized method](compiler/pragmas_recognized_by_compiler.md#marking-recognized-methods) | | `vm:idempotent` | Method marked with this pragma can be repeated or restarted multiple times without change to its effect. Loading, storing of memory values are examples of this, while reads and writes from file are examples of non-idempotent methods. At present, use of this pragma is limited to driving inlining of force-optimized functions. | +| `vm:cachable-idempotent` | Functions marked with this pragma will have their call site cache the return value. Not supported in ia32. Call site must have the pragma `vm:force-optimze`. | +| `vm:force-optimze` | Functions marked with this pragma will be compiled with the optimized pipeline and may not deoptimize. | ## Pragmas ignored in user code diff --git a/runtime/tests/vm/dart/cachable_idempotent_test.dart b/runtime/tests/vm/dart/cachable_idempotent_test.dart new file mode 100644 index 000000000000..74b076a8d8f4 --- /dev/null +++ b/runtime/tests/vm/dart/cachable_idempotent_test.dart @@ -0,0 +1,236 @@ +// Copyright (c) 2023, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import 'dart:ffi'; + +import 'package:expect/expect.dart'; + +void main() { + testMultipleIncrement(); + reset(); + testMultipleCallSites(); + reset(); + testManyArguments(); + reset(); + testNonIntArguments(); + reset(); + testLargeInt(); + reset(); + testIntArguments(); + reset(); + testDoubleArguments(); + print('done'); +} + +@pragma('vm:force-optimize') +void testMultipleIncrement() { + int result = 0; + final counter = makeCounter(100000); + while (counter()) { + // We this calls with a cacheable call, + // which will lead to the counter no longer being incremented. + // Make sure to return the value, so we can see that the boxing and + // unboxing works as expected. + result = cachedIncrement(/*must be const*/ 3); + } + // Since this call site is force optimized, we should never recompile and thus + // we only ever increment the global counter once. + Expect.equals(3, result); +} + +/// A global counter, except for the call sites are being cached. +/// +/// Arguments passed to this function must be const. +/// Call sites should be rewritten to cache using the pool. +@pragma('vm:never-inline') +@pragma('vm:cachable-idempotent') +int cachedIncrement(int amount) { + return _globalCounter += amount; +} + +int _globalCounter = 0; + +void reset() { + print('reset'); + _globalCounter = 0; +} + +/// Helper for vm:force-optimize for loops without instance calls. +/// +/// A for loop uses the `operator+` on int. +bool Function() makeCounter(int count) { + return () => count-- >= 0; +} + +@pragma('vm:force-optimize') +void testMultipleCallSites() { + int result = 0; + final counter = makeCounter(10); + result = cachedIncrement(1); + while (counter()) { + result = cachedIncrement(10); + result = cachedIncrement(10); + } + result = cachedIncrement(100); + // All call sites are cached individually. + // Even if the arguments are identical. + Expect.equals(result, 121); +} + +@pragma('vm:force-optimize') +void testManyArguments() { + final result = manyArguments(1, 2, 3, 4, 5, 6, 7, 8, 9, 10); + Expect.equals(55, result); +} + +@pragma('vm:never-inline') +@pragma('vm:cachable-idempotent') +int manyArguments(int i1, int i2, int i3, int i4, int i5, int i6, int i7, + int i8, int i9, int i10) { + return i1 + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + i10; +} + +@pragma('vm:force-optimize') +void testNonIntArguments() { + final result = lotsOfConstArguments( + "foo", + 3.0, + 3, + const _MyClass(_MyClass(42)), + ); + + Expect.equals(37, result); +} + +@pragma('vm:never-inline') +@pragma('vm:cachable-idempotent') +int lotsOfConstArguments(String s, double d, int i, _MyClass m) { + return [s, d, i, m].toString().length; +} + +final class _MyClass { + final Object i; + const _MyClass(this.i); + + @override + String toString() => '_MyClass($i)'; +} + +@pragma('vm:force-optimize') +void testLargeInt() { + final counter = makeCounter(10); + while (counter()) { + if (is64bitsArch()) { + final result1 = cachedIncrement(0x7FFFFFFFFFFFFFFF); + Expect.equals(0x7FFFFFFFFFFFFFFF, result1); + _globalCounter = 0; + final result2 = cachedIncrement(0x8000000000000000); + Expect.equals(0x8000000000000000, result2); + _globalCounter = 0; + final result3 = cachedIncrement(0xFFFFFFFFFFFFFFFF); + Expect.equals(0xFFFFFFFFFFFFFFFF, result3); + } else { + final result1 = cachedIncrement(0x7FFFFFFF); + Expect.equals(0x7FFFFFFF, result1); + _globalCounter = 0; + final result2 = cachedIncrement(0x80000000); + Expect.equals(0x80000000, result2); + _globalCounter = 0; + final result3 = cachedIncrement(0xFFFFFFFF); + Expect.equals(0xFFFFFFFF, result3); + } + } +} + +bool is64bitsArch() => sizeOf() == 8; + +@pragma('vm:force-optimize') +void testIntArguments() { + final result = lotsOfIntArguments( + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + ); + Expect.equals(36, result); + + // Do a second call with different values to prevent the argument values + // propagating to the function body in TFA. + final result2 = lotsOfIntArguments( + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + ); + Expect.equals(836, result2); +} + +@pragma('vm:never-inline') +@pragma('vm:cachable-idempotent') +int lotsOfIntArguments( + int d1, + int d2, + int d3, + int d4, + int d5, + int d6, + int d7, + int d8, +) { + print([d1, d2, d3, d4, d5, d6, d7, d8]); + return (d1 + d2 + d3 + d4 + d5 + d6 + d7 + d8).floor(); +} + +@pragma('vm:force-optimize') +void testDoubleArguments() { + final result = lotsOfDoubleArguments( + 1.0, + 2.0, + 3.0, + 4.0, + 5.0, + 6.0, + 7.0, + 8.0, + ); + Expect.equals(36, result); + + // Do a second call with different values to prevent the argument values + // propagating to the function body in TFA. + final result2 = lotsOfDoubleArguments( + 101.0, + 102.0, + 103.0, + 104.0, + 105.0, + 106.0, + 107.0, + 108.0, + ); + Expect.equals(836, result2); +} + +@pragma('vm:never-inline') +@pragma('vm:cachable-idempotent') +int lotsOfDoubleArguments( + double d1, + double d2, + double d3, + double d4, + double d5, + double d6, + double d7, + double d8, +) { + print([d1, d2, d3, d4, d5, d6, d7, d8]); + return (d1 + d2 + d3 + d4 + d5 + d6 + d7 + d8).floor(); +} diff --git a/runtime/tests/vm/vm.status b/runtime/tests/vm/vm.status index f28a0eeeec4f..4726799df104 100644 --- a/runtime/tests/vm/vm.status +++ b/runtime/tests/vm/vm.status @@ -32,6 +32,7 @@ dart/snapshot_version_test: Skip # This test is a Dart1 test (script snapshot) dart/stack_overflow_shared_test: Pass, Slow # Uses --shared-slow-path-triggers-gc flag. [ $arch == ia32 ] +dart/cachable_idempotent_test: Skip # CachableIdempotent calls are not supported in ia32 because it has no object pool. dart/disassemble_aot_test: SkipByDesign # IA32 does not support AOT. dart/regress32597_2_test: Pass, Slow # Uses --optimization-counter-threshold=10 without a kernel service snapshot. dart/regress38467_test: Pass, Slow # Uses --optimization-counter-threshold=10 without a kernel service snapshot. diff --git a/runtime/vm/app_snapshot.cc b/runtime/vm/app_snapshot.cc index ab0636a5b775..8ec66f2b8fa5 100644 --- a/runtime/vm/app_snapshot.cc +++ b/runtime/vm/app_snapshot.cc @@ -3284,6 +3284,9 @@ class ObjectPoolDeserializationCluster : public DeserializationCluster { static_cast(switchable_call_miss_entry_point); continue; #endif // defined(DART_PRECOMPILED_RUNTIME) + case ObjectPool::SnapshotBehavior::kSetToZero: + entry.raw_value_ = 0; + continue; default: FATAL("Unexpected snapshot behavior: %d\n", snapshot_behavior); } diff --git a/runtime/vm/compiler/assembler/assembler_arm.cc b/runtime/vm/compiler/assembler/assembler_arm.cc index ac3c0401061f..ae02ce8fbcba 100644 --- a/runtime/vm/compiler/assembler/assembler_arm.cc +++ b/runtime/vm/compiler/assembler/assembler_arm.cc @@ -1569,6 +1569,33 @@ void Assembler::LoadWordFromPoolIndex(Register rd, } } +void Assembler::StoreWordToPoolIndex(Register value, + intptr_t index, + Register pp, + Condition cond) { + ASSERT((pp != PP) || constant_pool_allowed()); + ASSERT(value != pp); + // PP is tagged on ARM. + const int32_t offset = + target::ObjectPool::element_offset(index) - kHeapObjectTag; + int32_t offset_mask = 0; + if (Address::CanHoldLoadOffset(kFourBytes, offset, &offset_mask)) { + str(value, Address(pp, offset), cond); + } else { + int32_t offset_hi = offset & ~offset_mask; // signed + uint32_t offset_lo = offset & offset_mask; // unsigned + // Inline a simplified version of AddImmediate(rd, pp, offset_hi). + Operand o; + if (Operand::CanHold(offset_hi, &o)) { + add(TMP, pp, o, cond); + } else { + LoadImmediate(TMP, offset_hi, cond); + add(TMP, pp, Operand(TMP), cond); + } + str(value, Address(TMP, offset_lo), cond); + } +} + void Assembler::CheckCodePointer() { #ifdef DEBUG if (!FLAG_check_code_pointer) { diff --git a/runtime/vm/compiler/assembler/assembler_arm.h b/runtime/vm/compiler/assembler/assembler_arm.h index a3827eabc019..85858660e1da 100644 --- a/runtime/vm/compiler/assembler/assembler_arm.h +++ b/runtime/vm/compiler/assembler/assembler_arm.h @@ -983,6 +983,13 @@ class Assembler : public AssemblerBase { intptr_t index, Register pp = PP, Condition cond = AL); + // Store word to pool at the given offset. + // + // Note: clobbers TMP. + void StoreWordToPoolIndex(Register value, + intptr_t index, + Register pp = PP, + Condition cond = AL); void LoadObject(Register rd, const Object& object, Condition cond = AL); void LoadUniqueObject( diff --git a/runtime/vm/compiler/assembler/assembler_arm64.cc b/runtime/vm/compiler/assembler/assembler_arm64.cc index 074bcbadde96..31050cd9f6ad 100644 --- a/runtime/vm/compiler/assembler/assembler_arm64.cc +++ b/runtime/vm/compiler/assembler/assembler_arm64.cc @@ -434,6 +434,34 @@ void Assembler::LoadWordFromPoolIndex(Register dst, } } +void Assembler::StoreWordToPoolIndex(Register src, + intptr_t index, + Register pp) { + ASSERT((pp != PP) || constant_pool_allowed()); + ASSERT(src != pp); + Operand op; + // PP is _un_tagged on ARM64. + const uint32_t offset = target::ObjectPool::element_offset(index); + const uint32_t upper20 = offset & 0xfffff000; + if (Address::CanHoldOffset(offset)) { + str(src, Address(pp, offset)); + } else if (Operand::CanHold(upper20, kXRegSizeInBits, &op) == + Operand::Immediate) { + const uint32_t lower12 = offset & 0x00000fff; + ASSERT(Address::CanHoldOffset(lower12)); + add(TMP, pp, op); + str(src, Address(TMP, lower12)); + } else { + const uint16_t offset_low = Utils::Low16Bits(offset); + const uint16_t offset_high = Utils::High16Bits(offset); + movz(TMP, Immediate(offset_low), 0); + if (offset_high != 0) { + movk(TMP, Immediate(offset_high), 1); + } + str(src, Address(pp, TMP)); + } +} + void Assembler::LoadDoubleWordFromPoolIndex(Register lower, Register upper, intptr_t index) { diff --git a/runtime/vm/compiler/assembler/assembler_arm64.h b/runtime/vm/compiler/assembler/assembler_arm64.h index 204c1b42b423..91a9fcb2922b 100644 --- a/runtime/vm/compiler/assembler/assembler_arm64.h +++ b/runtime/vm/compiler/assembler/assembler_arm64.h @@ -2173,6 +2173,11 @@ class Assembler : public AssemblerBase { // Note: the function never clobbers TMP, TMP2 scratch registers. void LoadWordFromPoolIndex(Register dst, intptr_t index, Register pp = PP); + // Store word to pool at the given offset. + // + // Note: clobbers TMP. + void StoreWordToPoolIndex(Register src, intptr_t index, Register pp = PP); + void LoadDoubleWordFromPoolIndex(Register lower, Register upper, intptr_t index); diff --git a/runtime/vm/compiler/assembler/assembler_base.cc b/runtime/vm/compiler/assembler/assembler_base.cc index 1c9cc91b2484..9d738e674829 100644 --- a/runtime/vm/compiler/assembler/assembler_base.cc +++ b/runtime/vm/compiler/assembler/assembler_base.cc @@ -381,11 +381,12 @@ intptr_t ObjectPoolBuilder::AddObject( return AddObject(ObjectPoolBuilderEntry(&obj, patchable, snapshot_behavior)); } -intptr_t ObjectPoolBuilder::AddImmediate(uword imm) { - return AddObject( - ObjectPoolBuilderEntry(imm, ObjectPoolBuilderEntry::kImmediate, - ObjectPoolBuilderEntry::kNotPatchable, - ObjectPoolBuilderEntry::kSnapshotable)); +intptr_t ObjectPoolBuilder::AddImmediate( + uword imm, + ObjectPoolBuilderEntry::Patchability patchable, + ObjectPoolBuilderEntry::SnapshotBehavior snapshotability) { + return AddObject(ObjectPoolBuilderEntry( + imm, ObjectPoolBuilderEntry::kImmediate, patchable, snapshotability)); } intptr_t ObjectPoolBuilder::AddImmediate64(uint64_t imm) { diff --git a/runtime/vm/compiler/assembler/assembler_riscv.cc b/runtime/vm/compiler/assembler/assembler_riscv.cc index 06930ed38236..703a477f307d 100644 --- a/runtime/vm/compiler/assembler/assembler_riscv.cc +++ b/runtime/vm/compiler/assembler/assembler_riscv.cc @@ -3713,6 +3713,24 @@ void Assembler::LoadWordFromPoolIndex(Register dst, } } +void Assembler::StoreWordToPoolIndex(Register src, + intptr_t index, + Register pp) { + ASSERT((pp != PP) || constant_pool_allowed()); + ASSERT(src != pp); + const uint32_t offset = target::ObjectPool::element_offset(index); + // PP is untagged. + intx_t lo = ImmLo(offset); + intx_t hi = ImmHi(offset); + if (hi == 0) { + sx(src, Address(pp, lo)); + } else { + lui(TMP, hi); + add(TMP, TMP, pp); + sx(src, Address(TMP, lo)); + } +} + void Assembler::CompareObject(Register reg, const Object& object) { ASSERT(IsOriginalObject(object)); if (IsSameObject(compiler::NullObject(), object)) { diff --git a/runtime/vm/compiler/assembler/assembler_riscv.h b/runtime/vm/compiler/assembler/assembler_riscv.h index 9babdc47e3b3..57632941e2b7 100644 --- a/runtime/vm/compiler/assembler/assembler_riscv.h +++ b/runtime/vm/compiler/assembler/assembler_riscv.h @@ -1370,6 +1370,11 @@ class Assembler : public MicroAssembler { // Note: the function never clobbers TMP, TMP2 scratch registers. void LoadWordFromPoolIndex(Register dst, intptr_t index, Register pp = PP); + // Store word to pool at the given offset. + // + // Note: clobbers TMP, does not clobber TMP2. + void StoreWordToPoolIndex(Register src, intptr_t index, Register pp = PP); + void PushObject(const Object& object) { if (IsSameObject(compiler::NullObject(), object)) { PushRegister(NULL_REG); diff --git a/runtime/vm/compiler/assembler/assembler_x64.cc b/runtime/vm/compiler/assembler/assembler_x64.cc index fae91c384c82..ede2cff9a8b2 100644 --- a/runtime/vm/compiler/assembler/assembler_x64.cc +++ b/runtime/vm/compiler/assembler/assembler_x64.cc @@ -1325,10 +1325,14 @@ void Assembler::LoadWordFromPoolIndex(Register dst, intptr_t idx) { ASSERT(constant_pool_allowed()); ASSERT(dst != PP); // PP is tagged on X64. - const int32_t offset = - target::ObjectPool::element_offset(idx) - kHeapObjectTag; - // This sequence must be decodable by code_patcher_x64.cc. - movq(dst, Address(PP, offset)); + movq(dst, FieldAddress(PP, target::ObjectPool::element_offset(idx))); +} + +void Assembler::StoreWordToPoolIndex(Register src, intptr_t idx) { + ASSERT(constant_pool_allowed()); + ASSERT(src != PP); + // PP is tagged on X64. + movq(FieldAddress(PP, target::ObjectPool::element_offset(idx)), src); } void Assembler::LoadInt64FromBoxOrSmi(Register result, Register value) { diff --git a/runtime/vm/compiler/assembler/assembler_x64.h b/runtime/vm/compiler/assembler/assembler_x64.h index e12a0b61c95c..2a0f7d66b897 100644 --- a/runtime/vm/compiler/assembler/assembler_x64.h +++ b/runtime/vm/compiler/assembler/assembler_x64.h @@ -1477,6 +1477,9 @@ class Assembler : public AssemblerBase { static bool IsSafe(const Object& object) { return true; } static bool IsSafeSmi(const Object& object) { return target::IsSmi(object); } + void LoadWordFromPoolIndex(Register dst, intptr_t index); + void StoreWordToPoolIndex(Register src, intptr_t index); + private: bool constant_pool_allowed_; @@ -1487,7 +1490,6 @@ class Assembler : public AssemblerBase { bool is_unique, ObjectPoolBuilderEntry::SnapshotBehavior snapshot_behavior = ObjectPoolBuilderEntry::kSnapshotable); - void LoadWordFromPoolIndex(Register dst, intptr_t index); void AluL(uint8_t modrm_opcode, Register dst, const Immediate& imm); void AluB(uint8_t modrm_opcode, const Address& dst, const Immediate& imm); diff --git a/runtime/vm/compiler/assembler/object_pool_builder.h b/runtime/vm/compiler/assembler/object_pool_builder.h index 4d410aa50a9f..8363c31fab09 100644 --- a/runtime/vm/compiler/assembler/object_pool_builder.h +++ b/runtime/vm/compiler/assembler/object_pool_builder.h @@ -44,6 +44,9 @@ struct ObjectPoolBuilderEntry { // (`ic_data`, [kImmediate] `entrypoint`) in the object pool instead on // deserialization. kResetToSwitchableCallMissEntryPoint, + + // Set the value to 0 on snapshot writing. + kSetToZero, }; enum EntryType { @@ -228,7 +231,12 @@ class ObjectPoolBuilder : public ValueObject { ObjectPoolBuilderEntry::kNotPatchable, ObjectPoolBuilderEntry::SnapshotBehavior snapshot_behavior = ObjectPoolBuilderEntry::kSnapshotable); - intptr_t AddImmediate(uword imm); + intptr_t AddImmediate( + uword imm, + ObjectPoolBuilderEntry::Patchability patchable = + ObjectPoolBuilderEntry::kNotPatchable, + ObjectPoolBuilderEntry::SnapshotBehavior snapshotability = + ObjectPoolBuilderEntry::kSnapshotable); intptr_t AddImmediate64(uint64_t imm); intptr_t AddImmediate128(simd128_value_t imm); diff --git a/runtime/vm/compiler/backend/constant_propagator.cc b/runtime/vm/compiler/backend/constant_propagator.cc index 42225e74d881..e425a605c30e 100644 --- a/runtime/vm/compiler/backend/constant_propagator.cc +++ b/runtime/vm/compiler/backend/constant_propagator.cc @@ -546,6 +546,12 @@ void ConstantPropagator::VisitStaticCall(StaticCallInstr* instr) { SetValue(instr, non_constant_); } +void ConstantPropagator::VisitCachableIdempotentCall( + CachableIdempotentCallInstr* instr) { + // This instruction should not be inserted if its value is constant. + SetValue(instr, non_constant_); +} + void ConstantPropagator::VisitLoadLocal(LoadLocalInstr* instr) { // Instruction is eliminated when translating to SSA. UNREACHABLE(); diff --git a/runtime/vm/compiler/backend/flow_graph.cc b/runtime/vm/compiler/backend/flow_graph.cc index 9c37c88c3c07..1688efe82d23 100644 --- a/runtime/vm/compiler/backend/flow_graph.cc +++ b/runtime/vm/compiler/backend/flow_graph.cc @@ -2555,6 +2555,10 @@ void FlowGraph::EliminateEnvironments() { } for (ForwardInstructionIterator it(block); !it.Done(); it.Advance()) { Instruction* current = it.Current(); + // This check is inconsistent with the flow graph checker. The flow graph + // checker does not allow for not having an env if the block is not + // inside a try-catch. + // See FlowGraphChecker::VisitInstruction. if (!current->ComputeCanDeoptimize() && !current->ComputeCanDeoptimizeAfterCall() && (!current->MayThrow() || !current->GetBlock()->InsideTryBlock())) { diff --git a/runtime/vm/compiler/backend/flow_graph_checker.cc b/runtime/vm/compiler/backend/flow_graph_checker.cc index 7cce0d5ffa8f..92ae6a4fde58 100644 --- a/runtime/vm/compiler/backend/flow_graph_checker.cc +++ b/runtime/vm/compiler/backend/flow_graph_checker.cc @@ -244,7 +244,9 @@ void FlowGraphChecker::VisitInstruction(Instruction* instruction) { #if !defined(DART_PRECOMPILER) // In JIT mode, any instruction which may throw must have a deopt-id, except // tail-call because it replaces the stack frame. - ASSERT1(!instruction->MayThrow() || instruction->IsTailCall() || + ASSERT1(!instruction->MayThrow() || + !instruction->GetBlock()->InsideTryBlock() || + instruction->IsTailCall() || instruction->deopt_id() != DeoptId::kNone, instruction); @@ -513,11 +515,12 @@ void FlowGraphChecker::AssertArgumentsInEnv(Definition* call) { call); } else { if (env->LazyDeoptToBeforeDeoptId()) { - // The deoptimization environment attached to this [call] instruction may - // no longer target the same call in unoptimized code. It may target anything. + // The deoptimization environment attached to this [call] instruction + // may no longer target the same call in unoptimized code. It may + // target anything. // - // As a result, we cannot assume the arguments we pass to the call will also be - // in the deopt environment. + // As a result, we cannot assume the arguments we pass to the call + // will also be in the deopt environment. // // This currently can happen in inlined force-optimized instructions. ASSERT(call->inlining_id() > 0); diff --git a/runtime/vm/compiler/backend/flow_graph_compiler.cc b/runtime/vm/compiler/backend/flow_graph_compiler.cc index b99f93f90feb..8b21683220cb 100644 --- a/runtime/vm/compiler/backend/flow_graph_compiler.cc +++ b/runtime/vm/compiler/backend/flow_graph_compiler.cc @@ -472,8 +472,7 @@ void FlowGraphCompiler::EmitCallsiteMetadata(const InstructionSource& source, if ((deopt_id != DeoptId::kNone) && !FLAG_precompiled_mode) { // Marks either the continuation point in unoptimized code or the // deoptimization point in optimized code, after call. - if (is_optimizing()) { - ASSERT(env != nullptr); + if (env != nullptr) { // Note that we may lazy-deopt to the same IR instruction in unoptimized // code or to another IR instruction (e.g. if LICM hoisted an instruction // it will lazy-deopt to a Goto). @@ -484,7 +483,6 @@ void FlowGraphCompiler::EmitCallsiteMetadata(const InstructionSource& source, : DeoptId::ToDeoptAfter(deopt_id); AddDeoptIndexAtCall(dest_deopt_id, env); } else { - ASSERT(env == nullptr); const intptr_t deopt_id_after = DeoptId::ToDeoptAfter(deopt_id); // Add deoptimization continuation point after the call and before the // arguments are removed. diff --git a/runtime/vm/compiler/backend/flow_graph_compiler.h b/runtime/vm/compiler/backend/flow_graph_compiler.h index 736f7b2b3e5e..969d2a3c6b86 100644 --- a/runtime/vm/compiler/backend/flow_graph_compiler.h +++ b/runtime/vm/compiler/backend/flow_graph_compiler.h @@ -923,6 +923,15 @@ class FlowGraphCompiler : public ValueObject { bool IsEmptyBlock(BlockEntryInstr* block) const; + void EmitOptimizedStaticCall( + const Function& function, + const Array& arguments_descriptor, + intptr_t size_with_type_args, + intptr_t deopt_id, + const InstructionSource& source, + LocationSummary* locs, + Code::EntryKind entry_kind = Code::EntryKind::kNormal); + private: friend class BoxInt64Instr; // For AddPcRelativeCallStubTarget(). friend class CheckNullInstr; // For AddPcRelativeCallStubTarget(). @@ -931,7 +940,7 @@ class FlowGraphCompiler : public ValueObject { friend class StoreIndexedInstr; // For AddPcRelativeCallStubTarget(). friend class StoreFieldInstr; // For AddPcRelativeCallStubTarget(). friend class CheckStackOverflowSlowPath; // For pending_deoptimization_env_. - friend class GraphIntrinsicCodeGenScope; // For optimizing_. + friend class GraphIntrinsicCodeGenScope; // For optimizing_. // Architecture specific implementation of simple native moves. void EmitNativeMoveArchitecture(const compiler::ffi::NativeLocation& dst, @@ -956,15 +965,6 @@ class FlowGraphCompiler : public ValueObject { // Emit code to load a Value into register 'dst'. void LoadValue(Register dst, Value* value); - void EmitOptimizedStaticCall( - const Function& function, - const Array& arguments_descriptor, - intptr_t size_with_type_args, - intptr_t deopt_id, - const InstructionSource& source, - LocationSummary* locs, - Code::EntryKind entry_kind = Code::EntryKind::kNormal); - void EmitUnoptimizedStaticCall( intptr_t size_with_type_args, intptr_t deopt_id, diff --git a/runtime/vm/compiler/backend/il.cc b/runtime/vm/compiler/backend/il.cc index 782e49999ffa..a99cef7dba87 100644 --- a/runtime/vm/compiler/backend/il.cc +++ b/runtime/vm/compiler/backend/il.cc @@ -5,9 +5,12 @@ #include "vm/compiler/backend/il.h" #include "platform/assert.h" +#include "platform/globals.h" #include "vm/bit_vector.h" #include "vm/bootstrap.h" +#include "vm/code_entry_kind.h" #include "vm/compiler/aot/dispatch_table_generator.h" +#include "vm/compiler/assembler/object_pool_builder.h" #include "vm/compiler/backend/code_statistics.h" #include "vm/compiler/backend/constant_propagator.h" #include "vm/compiler/backend/evaluator.h" @@ -5858,6 +5861,71 @@ void StaticCallInstr::EmitNativeCode(FlowGraphCompiler* compiler) { } } +Representation CachableIdempotentCallInstr::RequiredInputRepresentation( + intptr_t idx) const { + // The first input is the array of types for generic functions. + if (type_args_len() > 0 || function().IsFactory()) { + if (idx == 0) { + return kTagged; + } + idx--; + } + return FlowGraph::ParameterRepresentationAt(function(), idx); +} + +intptr_t CachableIdempotentCallInstr::ArgumentsSize() const { + return FlowGraph::ParameterOffsetAt(function(), + ArgumentCountWithoutTypeArgs(), + /*last_slot=*/false) + + ((type_args_len() > 0) ? 1 : 0); +} + +Definition* CachableIdempotentCallInstr::Canonicalize(FlowGraph* flow_graph) { + return this; +} + +LocationSummary* CachableIdempotentCallInstr::MakeLocationSummary( + Zone* zone, + bool optimizing) const { + return MakeCallSummary(zone, this); +} + +void CachableIdempotentCallInstr::EmitNativeCode(FlowGraphCompiler* compiler) { +#if !defined(TARGET_ARCH_IA32) + Zone* zone = compiler->zone(); + compiler::Label done; + const intptr_t cacheable_pool_index = __ object_pool_builder().AddImmediate( + 0, compiler::ObjectPoolBuilderEntry::kPatchable, + compiler::ObjectPoolBuilderEntry::kSetToZero); + const Register dst = locs()->out(0).reg(); + + __ Comment( + "CachableIdempotentCall pool load and check. pool_index = " + "%" Pd, + cacheable_pool_index); + __ LoadWordFromPoolIndex(dst, cacheable_pool_index); + __ CompareImmediate(dst, 0); + __ BranchIf(NOT_EQUAL, &done); + __ Comment("CachableIdempotentCall pool load and check - end"); + + ArgumentsInfo args_info(type_args_len(), ArgumentCount(), ArgumentsSize(), + argument_names()); + const Array& arguments_descriptor = + Array::ZoneHandle(zone, args_info.ToArgumentsDescriptor()); + compiler->EmitOptimizedStaticCall(function(), arguments_descriptor, + args_info.size_with_type_args, deopt_id(), + source(), locs(), CodeEntryKind::kNormal); + + __ Comment("CachableIdempotentCall pool store"); + if (!function().HasUnboxedReturnValue()) { + __ LoadWordFromBoxOrSmi(dst, dst); + } + __ StoreWordToPoolIndex(dst, cacheable_pool_index); + __ Comment("CachableIdempotentCall pool store - end"); + __ Bind(&done); +#endif +} + intptr_t AssertAssignableInstr::statistics_tag() const { switch (kind_) { case kParameterCheck: diff --git a/runtime/vm/compiler/backend/il.h b/runtime/vm/compiler/backend/il.h index d87a25985813..a82590442eb1 100644 --- a/runtime/vm/compiler/backend/il.h +++ b/runtime/vm/compiler/backend/il.h @@ -442,6 +442,7 @@ struct InstrAttrs { M(PolymorphicInstanceCall, _) \ M(DispatchTableCall, _) \ M(StaticCall, _) \ + M(CachableIdempotentCall, _) \ M(LoadLocal, kNoGC) \ M(DropTemps, kNoGC) \ M(MakeTemp, kNoGC) \ @@ -5643,6 +5644,94 @@ class StaticCallInstr : public TemplateDartCall<0> { DISALLOW_COPY_AND_ASSIGN(StaticCallInstr); }; +// A call to a function which has no side effects and of which the result can +// be cached. +// +// The arguments flowing into this call must be const. +// +// The result is cached in the pool. Hence this instruction is not supported +// on IA32. +class CachableIdempotentCallInstr : public TemplateDartCall<0> { + public: + CachableIdempotentCallInstr(const InstructionSource& source, + const Function& function, + intptr_t type_args_len, + const Array& argument_names, + InputsArray&& arguments, + intptr_t deopt_id) + : TemplateDartCall(deopt_id, + type_args_len, + argument_names, + std::move(arguments), + source), + function_(function), + identity_(AliasIdentity::Unknown()) { + DEBUG_ASSERT(function.IsNotTemporaryScopedHandle()); + ASSERT(AbstractType::Handle(function.result_type()).IsIntType()); + ASSERT(!function.IsNull()); +#if defined(TARGET_ARCH_IA32) + // No pool to cache in on IA32. + FATAL("Not supported on IA32."); +#endif + } + + DECLARE_INSTRUCTION(CachableIdempotentCall) + + const Function& function() const { return function_; } + + virtual CompileType ComputeType() const { return CompileType::Int(); } + + virtual Definition* Canonicalize(FlowGraph* flow_graph); + + virtual bool ComputeCanDeoptimize() const { return false; } + + virtual bool ComputeCanDeoptimizeAfterCall() const { return false; } + + virtual bool CanBecomeDeoptimizationTarget() const { return false; } + + virtual bool HasUnknownSideEffects() const { return true; } + + virtual bool CanCallDart() const { return true; } + + virtual SpeculativeMode SpeculativeModeOfInput(intptr_t idx) const { + if (type_args_len() > 0) { + if (idx == 0) { + return kGuardInputs; + } + idx--; + } + return function_.is_unboxed_parameter_at(idx) ? kNotSpeculative + : kGuardInputs; + } + + virtual intptr_t ArgumentsSize() const; + + virtual Representation RequiredInputRepresentation(intptr_t idx) const; + + virtual Representation representation() const { + // If other representations are supported in the future, the location + // summary needs to be updated as well to stay consistent with static calls. + return kUnboxedFfiIntPtr; + } + + virtual AliasIdentity Identity() const { return identity_; } + virtual void SetIdentity(AliasIdentity identity) { identity_ = identity; } + + PRINT_OPERANDS_TO_SUPPORT + +#define FIELD_LIST(F) \ + F(const Function&, function_) \ + F(AliasIdentity, identity_) + + DECLARE_INSTRUCTION_SERIALIZABLE_FIELDS(CachableIdempotentCallInstr, + TemplateDartCall, + FIELD_LIST) +#undef FIELD_LIST + + private: + DISALLOW_COPY_AND_ASSIGN(CachableIdempotentCallInstr); +}; + class LoadLocalInstr : public TemplateDefinition<0, NoThrow> { public: LoadLocalInstr(const LocalVariable& local, const InstructionSource& source) diff --git a/runtime/vm/compiler/backend/il_arm.cc b/runtime/vm/compiler/backend/il_arm.cc index 4c3d81d11731..9e630e87553c 100644 --- a/runtime/vm/compiler/backend/il_arm.cc +++ b/runtime/vm/compiler/backend/il_arm.cc @@ -46,6 +46,8 @@ LocationSummary* Instruction::MakeCallSummary(Zone* zone, const auto representation = instr->representation(); switch (representation) { case kTagged: + case kUnboxedUint32: + case kUnboxedInt32: result->set_out( 0, Location::RegisterLocation(CallingConventions::kReturnReg)); break; diff --git a/runtime/vm/compiler/backend/il_printer.cc b/runtime/vm/compiler/backend/il_printer.cc index fa6a6c10b4d9..26a7870912b9 100644 --- a/runtime/vm/compiler/backend/il_printer.cc +++ b/runtime/vm/compiler/backend/il_printer.cc @@ -904,6 +904,15 @@ void StaticCallInstr::PrintOperandsTo(BaseTextBuffer* f) const { } } +void CachableIdempotentCallInstr::PrintOperandsTo(BaseTextBuffer* f) const { + f->Printf(" %s<%" Pd "> ", String::Handle(function().name()).ToCString(), + type_args_len()); + for (intptr_t i = 0; i < ArgumentCount(); ++i) { + if (i > 0) f->AddString(", "); + ArgumentValueAt(i)->PrintTo(f); + } +} + void LoadLocalInstr::PrintOperandsTo(BaseTextBuffer* f) const { f->Printf("%s @%d", local().name().ToCString(), local().index().value()); } diff --git a/runtime/vm/compiler/backend/il_test.cc b/runtime/vm/compiler/backend/il_test.cc index fd98abf7d6e1..ec2b78d44d1d 100644 --- a/runtime/vm/compiler/backend/il_test.cc +++ b/runtime/vm/compiler/backend/il_test.cc @@ -966,6 +966,105 @@ ISOLATE_UNIT_TEST_CASE(IRTest_LoadThread) { EXPECT_EQ(reinterpret_cast(thread), result_int); } +#if !defined(TARGET_ARCH_IA32) +ISOLATE_UNIT_TEST_CASE(IRTest_CachableIdempotentCall) { + // clang-format off + auto kScript = Utils::CStringUniquePtr(OS::SCreate(nullptr, R"( + int globalCounter = 0; + + int increment() => ++globalCounter; + + int cachedIncrement() { + // We will replace this call with a cacheable call, + // which will lead to the counter no longer being incremented. + // Make sure to return the value, so we can see that the boxing and + // unboxing works as expected. + return increment(); + } + + int multipleIncrement() { + int returnValue = 0; + for(int i = 0; i < 10; i++) { + // Save the last returned value. + returnValue = cachedIncrement(); + } + return returnValue; + } + )"), std::free); + // clang-format on + + const auto& root_library = Library::Handle(LoadTestScript(kScript.get())); + const auto& first_result = + Object::Handle(Invoke(root_library, "multipleIncrement")); + EXPECT(first_result.IsSmi()); + if (first_result.IsSmi()) { + const intptr_t int_value = Smi::Cast(first_result).Value(); + EXPECT_EQ(10, int_value); + } + + const auto& cached_increment_function = + Function::Handle(GetFunction(root_library, "cachedIncrement")); + + const auto& increment_function = + Function::ZoneHandle(GetFunction(root_library, "increment")); + + TestPipeline pipeline(cached_increment_function, CompilerPass::kJIT); + FlowGraph* flow_graph = pipeline.RunPasses({ + CompilerPass::kComputeSSA, + }); + + StaticCallInstr* static_call = nullptr; + { + ILMatcher cursor(flow_graph, flow_graph->graph_entry()->normal_entry()); + + EXPECT(cursor.TryMatch({ + kMoveGlob, + {kMatchAndMoveStaticCall, &static_call}, + kMoveGlob, + kMatchReturn, + })); + } + + InputsArray args; + CachableIdempotentCallInstr* call = new CachableIdempotentCallInstr( + InstructionSource(), increment_function, static_call->type_args_len(), + Array::empty_array(), std::move(args), DeoptId::kNone); + static_call->ReplaceWith(call, nullptr); + + pipeline.RunForcedOptimizedAfterSSAPasses(); + + { + ILMatcher cursor(flow_graph, flow_graph->graph_entry()->normal_entry()); + + EXPECT(cursor.TryMatch({ + kMoveGlob, + kMatchAndMoveCachableIdempotentCall, + kMoveGlob, + // The cacheable call returns unboxed, so select representations + // adds boxing. + kMatchBox, + kMoveGlob, + kMatchReturn, + })); + } + + { +#if !defined(PRODUCT) + SetFlagScope sfs(&FLAG_disassemble_optimized, true); +#endif + pipeline.CompileGraphAndAttachFunction(); + } + + const auto& second_result = + Object::Handle(Invoke(root_library, "multipleIncrement")); + EXPECT(second_result.IsSmi()); + if (second_result.IsSmi()) { + const intptr_t int_value = Smi::Cast(second_result).Value(); + EXPECT_EQ(11, int_value); + } +} +#endif + // Helper to set up an inlined FfiCall by replacing a StaticCall. FlowGraph* SetupFfiFlowgraph(TestPipeline* pipeline, Zone* zone, diff --git a/runtime/vm/compiler/frontend/kernel_binary_flowgraph.cc b/runtime/vm/compiler/frontend/kernel_binary_flowgraph.cc index c144eb2c469f..1419da67d9a3 100644 --- a/runtime/vm/compiler/frontend/kernel_binary_flowgraph.cc +++ b/runtime/vm/compiler/frontend/kernel_binary_flowgraph.cc @@ -10,6 +10,7 @@ #include "vm/compiler/frontend/flow_graph_builder.h" // For dart::FlowGraphBuilder::SimpleInstanceOfType. #include "vm/compiler/frontend/prologue_builder.h" #include "vm/compiler/jit/compiler.h" +#include "vm/kernel_binary.h" #include "vm/object_store.h" #include "vm/resolver.h" #include "vm/stack_frame.h" @@ -3333,6 +3334,10 @@ Fragment StreamingFlowGraphBuilder::BuildStaticInvocation(TokenPosition* p) { ++argument_count; } + if (target.IsCachableIdempotent()) { + return BuildCachableIdempotentCall(position, target); + } + const auto recognized_kind = target.recognized_kind(); switch (recognized_kind) { case MethodRecognizer::kNativeEffect: @@ -6234,6 +6239,66 @@ Fragment StreamingFlowGraphBuilder::BuildFfiAsFunctionInternal() { return code; } +Fragment StreamingFlowGraphBuilder::BuildArgumentsCachableIdempotentCall( + intptr_t* argument_count) { + *argument_count = ReadUInt(); // read arguments count. + + // List of types. + const intptr_t types_list_length = ReadListLength(); + if (types_list_length != 0) { + FATAL("Type arguments for vm:cachable-idempotent not (yet) supported."); + } + + Fragment code; + // List of positional. + intptr_t positional_list_length = ReadListLength(); + for (intptr_t i = 0; i < positional_list_length; ++i) { + code += BuildExpression(); + Definition* target_def = B->Peek(); + if (!target_def->IsConstant()) { + FATAL( + "Arguments for vm:cachable-idempotent must be const, argument on " + "index %" Pd " is not.", + i); + } + } + + // List of named. + const intptr_t named_args_len = ReadListLength(); + if (named_args_len != 0) { + FATAL("Named arguments for vm:cachable-idempotent not (yet) supported."); + } + + return code; +} + +Fragment StreamingFlowGraphBuilder::BuildCachableIdempotentCall( + TokenPosition position, + const Function& target) { + // The call site must me fore optimized because the cache is untagged. + if (!parsed_function()->function().ForceOptimize()) { + FATAL( + "vm:cachable-idempotent functions can only be called from " + "vm:force-optimize functions."); + } + const auto& target_result_type = AbstractType::Handle(target.result_type()); + if (!target_result_type.IsIntType()) { + FATAL("The return type vm:cachable-idempotent functions must be int.") + } + + Fragment code; + Array& argument_names = Array::ZoneHandle(Z); + intptr_t argument_count; + code += BuildArgumentsCachableIdempotentCall(&argument_count); + + code += flow_graph_builder_->CachableIdempotentCall( + position, target, argument_count, argument_names, + /*type_args_len=*/0); + code += flow_graph_builder_->Box(kUnboxedFfiIntPtr); + + return code; +} + Fragment StreamingFlowGraphBuilder::BuildFfiNativeCallbackFunction( FfiFunctionKind kind) { // The call-site must look like this (guaranteed by the FE which inserts it): diff --git a/runtime/vm/compiler/frontend/kernel_binary_flowgraph.h b/runtime/vm/compiler/frontend/kernel_binary_flowgraph.h index ce5ceeb51709..f4e378501916 100644 --- a/runtime/vm/compiler/frontend/kernel_binary_flowgraph.h +++ b/runtime/vm/compiler/frontend/kernel_binary_flowgraph.h @@ -395,6 +395,10 @@ class StreamingFlowGraphBuilder : public KernelReaderHelper { // Kernel buffer and pushes the resulting Function object. Fragment BuildFfiNativeCallbackFunction(FfiFunctionKind kind); + Fragment BuildArgumentsCachableIdempotentCall(intptr_t* argument_count); + Fragment BuildCachableIdempotentCall(TokenPosition position, + const Function& target); + // Piece of a StringConcatenation. // Represents either a StringLiteral, or a Reader offset to the expression. struct ConcatPiece { diff --git a/runtime/vm/compiler/frontend/kernel_to_il.cc b/runtime/vm/compiler/frontend/kernel_to_il.cc index 33f427746246..17893ddc53c3 100644 --- a/runtime/vm/compiler/frontend/kernel_to_il.cc +++ b/runtime/vm/compiler/frontend/kernel_to_il.cc @@ -658,6 +658,20 @@ Fragment FlowGraphBuilder::StaticCall(TokenPosition position, return Fragment(call); } +Fragment FlowGraphBuilder::CachableIdempotentCall(TokenPosition position, + const Function& target, + intptr_t argument_count, + const Array& argument_names, + intptr_t type_args_count) { + const intptr_t total_count = argument_count + (type_args_count > 0 ? 1 : 0); + InputsArray arguments = GetArguments(total_count); + CachableIdempotentCallInstr* call = new (Z) CachableIdempotentCallInstr( + InstructionSource(position), target, type_args_count, argument_names, + std::move(arguments), GetNextDeoptId()); + Push(call); + return Fragment(call); +} + Fragment FlowGraphBuilder::StringInterpolateSingle(TokenPosition position) { Fragment instructions; instructions += StaticCall( diff --git a/runtime/vm/compiler/frontend/kernel_to_il.h b/runtime/vm/compiler/frontend/kernel_to_il.h index 702f7b9b1b9b..1eb49f428f1b 100644 --- a/runtime/vm/compiler/frontend/kernel_to_il.h +++ b/runtime/vm/compiler/frontend/kernel_to_il.h @@ -222,6 +222,11 @@ class FlowGraphBuilder : public BaseFlowGraphBuilder { const InferredTypeMetadata* result_type = nullptr, intptr_t type_args_len = 0, bool use_unchecked_entry = false); + Fragment CachableIdempotentCall(TokenPosition position, + const Function& target, + intptr_t argument_count, + const Array& argument_names, + intptr_t type_args_len = 0); Fragment StringInterpolateSingle(TokenPosition position); Fragment StringInterpolate(TokenPosition position); Fragment ThrowTypeError(); diff --git a/runtime/vm/object.cc b/runtime/vm/object.cc index 4ca418007c2d..13b6c522649b 100644 --- a/runtime/vm/object.cc +++ b/runtime/vm/object.cc @@ -9066,22 +9066,35 @@ bool Function::IsUnmodifiableTypedDataViewFactory() const { } } +static bool InVmTests(const Function& function) { +#if defined(TESTING) + return true; +#else + auto* zone = Thread::Current()->zone(); + const auto& cls = Class::Handle(zone, function.Owner()); + const auto& lib = Library::Handle(zone, cls.library()); + const auto& url = String::Handle(zone, lib.url()); + const bool in_vm_tests = + strstr(url.ToCString(), "runtime/tests/vm/") != nullptr; + return in_vm_tests; +#endif +} + bool Function::ForceOptimize() const { if (RecognizedKindForceOptimize() || IsFfiTrampoline() || IsTypedDataViewFactory() || IsUnmodifiableTypedDataViewFactory()) { return true; } -#if defined(TESTING) - // For run_vm_tests we allow marking arbitrary functions as force-optimize - // via `@pragma('vm:force-optimize')`. - if (has_pragma()) { - return Library::FindPragma(Thread::Current(), false, *this, - Symbols::vm_force_optimize()); - } -#endif // defined(TESTING) + if (!has_pragma()) return false; - return false; + const bool has_vm_pragma = Library::FindPragma( + Thread::Current(), false, *this, Symbols::vm_force_optimize()); + if (!has_vm_pragma) return false; + + // For run_vm_tests and runtime/tests/vm allow marking arbitrary functions as + // force-optimize via `@pragma('vm:force-optimize')`. + return InVmTests(*this); } bool Function::IsIdempotent() const { @@ -9097,6 +9110,18 @@ bool Function::IsIdempotent() const { *this, Symbols::vm_idempotent()); } +bool Function::IsCachableIdempotent() const { + if (!has_pragma()) return false; + + const bool has_vm_pragma = + Library::FindPragma(Thread::Current(), /*only_core=*/false, *this, + Symbols::vm_cachable_idempotent()); + if (!has_vm_pragma) return false; + + // For run_vm_tests and runtime/tests/vm allow marking arbitrary functions. + return InVmTests(*this); +} + bool Function::RecognizedKindForceOptimize() const { switch (recognized_kind()) { // Uses unboxed/untagged data not supported in unoptimized. diff --git a/runtime/vm/object.h b/runtime/vm/object.h index 423b1515eea6..57110f115b6c 100644 --- a/runtime/vm/object.h +++ b/runtime/vm/object.h @@ -3577,6 +3577,8 @@ class Function : public Object { // and retry it again. bool IsIdempotent() const; + bool IsCachableIdempotent() const; + // Whether this function's |recognized_kind| requires optimization. bool RecognizedKindForceOptimize() const; diff --git a/runtime/vm/symbols.h b/runtime/vm/symbols.h index 7c6e7af4f540..586da182bb05 100644 --- a/runtime/vm/symbols.h +++ b/runtime/vm/symbols.h @@ -531,6 +531,7 @@ class ObjectPointerVisitor; V(vm_idempotent, "vm:idempotent") \ V(vm_invisible, "vm:invisible") \ V(vm_isolate_unsendable, "vm:isolate-unsendable") \ + V(vm_cachable_idempotent, "vm:cachable-idempotent") \ V(vm_never_inline, "vm:never-inline") \ V(vm_non_nullable_result_type, "vm:non-nullable-result-type") \ V(vm_notify_debugger_on_exception, "vm:notify-debugger-on-exception") \