diff --git a/runtime/nvqir/custatevec/CuStateVecCircuitSimulator.cpp b/runtime/nvqir/custatevec/CuStateVecCircuitSimulator.cpp index 3f4d09bf76..5f4b2f4801 100644 --- a/runtime/nvqir/custatevec/CuStateVecCircuitSimulator.cpp +++ b/runtime/nvqir/custatevec/CuStateVecCircuitSimulator.cpp @@ -216,7 +216,8 @@ class CuStateVecCircuitSimulator void *newDeviceStateVector; HANDLE_CUDA_ERROR(cudaMalloc((void **)&newDeviceStateVector, stateDimension * sizeof(CudaDataType))); - + HANDLE_CUDA_ERROR(cudaMemset(newDeviceStateVector, 0, + stateDimension * sizeof(CudaDataType))); // Place the state data on device. Could be that // we just need the zero state, or the user could have provided one void *otherState; @@ -283,6 +284,8 @@ class CuStateVecCircuitSimulator void *newDeviceStateVector; HANDLE_CUDA_ERROR(cudaMalloc((void **)&newDeviceStateVector, stateDimension * sizeof(CudaDataType))); + HANDLE_CUDA_ERROR(cudaMemset(newDeviceStateVector, 0, + stateDimension * sizeof(CudaDataType))); constexpr int32_t threads_per_block = 256; uint32_t n_blocks = (stateDimension + threads_per_block - 1) / threads_per_block;