diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index ff2c6e2b..13d94eb2 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -5,6 +5,7 @@ on: push: branches: - main + - debug tags: - v* @@ -34,20 +35,3 @@ jobs: with: name: wheel path: wheelhouse/ - - publish: - runs-on: ubuntu-latest - needs: [release] - steps: - - uses: actions/download-artifact@v3 - with: - path: artifact - - name: Move files so the next action can find them - run: | - mkdir dist && mv artifact/wheel/* dist/ - ls dist/ - - name: Publish distribution to PyPI - if: startsWith(github.ref, 'refs/tags') - uses: pypa/gh-action-pypi-publish@release/v1 - with: - password: ${{ secrets.PYPI_API_TOKEN }} diff --git a/envpool/core/xla.h b/envpool/core/xla.h index e70fbb98..f4d9afff 100644 --- a/envpool/core/xla.h +++ b/envpool/core/xla.h @@ -74,8 +74,8 @@ Array GpuBufferToArray(cudaStream_t stream, const void* buffer, spec = spec.Batch(batch_size); } Array ret(spec); - cudaMemcpy(ret.Data(), buffer, ret.size * ret.element_size, - cudaMemcpyDeviceToHost); + cudaMemcpyAsync(ret.Data(), buffer, ret.size * ret.element_size, + cudaMemcpyDeviceToHost, stream); return ret; } @@ -161,6 +161,7 @@ struct XlaSend { ...); }, action_spec); + cudaStreamSynchronize(stream); envpool->Send(action); } }; diff --git a/envpool/core/xla_template.h b/envpool/core/xla_template.h index 2da813db..36e4226c 100644 --- a/envpool/core/xla_template.h +++ b/envpool/core/xla_template.h @@ -26,6 +26,7 @@ #include #include #include +#include namespace py = pybind11; @@ -36,6 +37,9 @@ static auto SpecToTuple(const Spec& spec) { template void ToArray(const void** raw, std::array* array) { + for (int j = 0; j < N; ++j) { + assert(raw[j] != nullptr); + } int i = 0; std::apply([&](auto&&... a) { ((a = const_cast(raw[i++])), ...); }, *array); @@ -43,6 +47,9 @@ void ToArray(const void** raw, std::array* array) { template void ToArray(void** raw, std::array* array) { + for (int j = 0; j < N; ++j) { + assert(raw[j] != nullptr); + } int i = 0; std::apply([&](auto&&... a) { ((a = raw[i++]), ...); }, *array); }