forked from pytorch/pytorch
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathTHCTensorMath.cuh
34 lines (30 loc) · 1.12 KB
/
THCTensorMath.cuh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
#ifndef THC_TENSORMATH_CUH
#define THC_TENSORMATH_CUH
#include "ATen/cuda/CUDAContext.h"
// Copy the kth diagonal of a matrix B to a vector A.
template <typename T>
#ifdef __HIP_PLATFORM_HCC__
C10_LAUNCH_BOUNDS_1(1024)
#endif
__global__ void THCTensor_copyFromDiagonal(T* a, T* b, ptrdiff_t start, ptrdiff_t size, ptrdiff_t strideSum, ptrdiff_t strideA) {
for (ptrdiff_t linearIndex = blockIdx.x * blockDim.x + threadIdx.x;
linearIndex < size;
linearIndex += gridDim.x * blockDim.x) {
const ptrdiff_t bOffset = start + strideSum * linearIndex;
a[strideA * linearIndex] = b[bOffset];
}
}
// Copy vector B to the kth diagonal of a matrix A
template <typename T>
#ifdef __HIP_PLATFORM_HCC__
C10_LAUNCH_BOUNDS_1(1024)
#endif
__global__ void THCTensor_copyToDiagonal(T* a, T* b, ptrdiff_t start, ptrdiff_t size, ptrdiff_t strideSum, ptrdiff_t strideB) {
for (ptrdiff_t linearIndex = blockIdx.x * blockDim.x + threadIdx.x;
linearIndex < size;
linearIndex += gridDim.x * blockDim.x) {
const ptrdiff_t aOffset = start + strideSum * linearIndex;
a[aOffset] = b[strideB * linearIndex];
}
}
#endif