forked from pytorch/pytorch
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathFill.cpp
131 lines (107 loc) · 3.57 KB
/
Fill.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
// Functions that fill Tensors with constants.
#include <ATen/ATen.h>
#include <ATen/Dispatch.h>
#include <ATen/native/Fill.h>
#include <ATen/native/TensorIterator.h>
#include <ATen/Utils.h>
#include <c10/util/accumulate.h>
namespace at {
namespace native {
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fill ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Tensor& fill_out(Tensor& self, const Scalar& value) {
if (self.is_quantized()) {
at::Tensor out = at::ones(self.sizes()).to(kFloat) * value;
out = out.to(self.device());
// Trust the `copy_` to handle the quantization and the boundary chacks.
self.copy_(out);
return self;
}
if (self.device() == at::kCPU && self.numel() == 1) {
return at::detail::scalar_fill(self, value);
}
auto iter = TensorIteratorConfig()
.set_check_mem_overlap(false) // Fill is idempotent, so overlap is okay
.check_all_same_dtype(false)
.add_output(self)
.resize_outputs(false)
.build();
fill_stub(iter.device_type(), iter, value);
return self;
}
Tensor& fill_(Tensor& self, const Scalar& value) {
return fill_out(self, value);
}
Tensor& fill_(Tensor& self, const Tensor& value) {
TORCH_CHECK(value.dim() == 0, "fill_ only supports 0-dimension value tensor but got tensor with ", value.dim(), " dimensions.");
return fill_out(self, value.item());
}
Tensor& fill_meta_(Tensor& self, const Scalar& value) {
return self;
}
Tensor& fill_meta_(Tensor& self, const Tensor& value) {
TORCH_CHECK(value.dim() == 0, "fill_ only supports 0-dimension value tensor but got tensor with ", value.dim(), " dimensions.");
return self;
}
DEFINE_DISPATCH(fill_stub);
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fill_diagonal ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Tensor& fill_diagonal_(Tensor& self, const Scalar& fill_value, bool wrap) {
int64_t nDims = self.dim();
TORCH_CHECK(nDims >= 2, "dimensions must larger than 1");
int64_t height = self.size(0);
int64_t width = self.size(1);
if (nDims > 2) {
int64_t dim1 = height;
for (int64_t i = 1; i < nDims; i++) {
if (self.size(i) != dim1) {
AT_ERROR("all dimensions of input must be of equal length");
}
}
}
int64_t storage_offset = self.storage_offset();
std::vector<int64_t> sizes;
std::vector<int64_t> strides;
int64_t size = std::min(height, width);
int64_t stride = 0;
for (int64_t i = 0; i < nDims; i++) {
stride += self.stride(i);
}
strides.push_back(stride);
sizes.push_back(size);
auto main_diag = self.as_strided(sizes, strides, storage_offset);
main_diag.fill_(fill_value);
if (wrap && nDims == 2 && height > width + 1) {
std::vector<int64_t> wrap_sizes;
int64_t step = width + 1;
int64_t wrap_size = ((self.numel() + step - 1) / step) - size;
wrap_sizes.push_back(wrap_size);
int64_t offset = self.stride(0) * (width + 1);
auto wrap_diag = self.as_strided(wrap_sizes, strides, storage_offset + offset);
wrap_diag.fill_(fill_value);
}
return self;
}
Tensor& zero_cpu_(Tensor &self, int64_t nelements) {
void* ptr = self.data_ptr();
if (nullptr == ptr) {
return self.fill_(0);
}
int64_t size_bytes = nelements * self.dtype().itemsize();
if (size_bytes > 0) {
std::memset(ptr, 0, size_bytes);
}
return self;
}
Tensor& zero_(Tensor &self) {
int64_t nelements = c10::multiply_integers(self.sizes());
if (self.device() == at::kCPU &&
self.is_non_overlapping_and_dense() &&
nelements < internal::GRAIN_SIZE) {
return zero_cpu_(self, nelements);
}
return self.fill_(0);
}
Tensor& zero_meta_(Tensor& self) {
return self;
}
} // namespace native
} // namespace at