-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfast_gradient_sign_untargeted.py
120 lines (84 loc) · 3.5 KB
/
fast_gradient_sign_untargeted.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
"""
this code is modified from https://github.com/utkuozbulak/pytorch-cnn-adversarial-attacks
original author: Utku Ozbulak - github.com/utkuozbulak
"""
import sys
sys.path.append("..")
import os
import numpy as np
import torch
from torch import nn
import torch.nn.functional as F
from utils import tensor2cuda
KL_WEIGHT=0.01
def project(x, original_x, epsilon, _type='linf'):
if _type == 'linf':
max_x = original_x + epsilon
min_x = original_x - epsilon
x = torch.max(torch.min(x, max_x), min_x)
elif _type == 'l2':
dist = (x - original_x)
dist = dist.view(x.shape[0], -1)
dist_norm = torch.norm(dist, dim=1, keepdim=True)
mask = (dist_norm > epsilon).unsqueeze(2).unsqueeze(3)
# dist = F.normalize(dist, p=2, dim=1)
dist = dist / dist_norm
dist *= epsilon
dist = dist.view(x.shape)
x = (original_x + dist) * mask.float() + x * (1 - mask.float())
else:
raise NotImplementedError
return x
class FastGradientSignUntargeted():
b"""
Fast gradient sign untargeted adversarial attack, minimizes the initial class activation
with iterative grad sign updates
"""
def __init__(self, model, epsilon, alpha, min_val, max_val, max_iters, _type='linf'):
self.model = model
# self.model.eval()
# Maximum perturbation
self.epsilon = epsilon
# Movement multiplier per iteration
self.alpha = alpha
# Minimum value of the pixels
self.min_val = min_val
# Maximum value of the pixels
self.max_val = max_val
# Maximum numbers of iteration to generated adversaries
self.max_iters = max_iters
# The perturbation of epsilon
self._type = _type
def perturb(self, original_images, labels, reduction4loss='mean', random_start=False):
# original_images: values are within self.min_val and self.max_val
# The adversaries created from random close points to the original data
if random_start:
rand_perturb = torch.FloatTensor(original_images.shape).uniform_(
-self.epsilon, self.epsilon)
rand_perturb = tensor2cuda(rand_perturb)
x = original_images + rand_perturb
x.clamp_(self.min_val, self.max_val)
else:
x = original_images.clone()
x.requires_grad = True
# max_x = original_images + self.epsilon
# min_x = original_images - self.epsilon
self.model.eval()
with torch.enable_grad():
for _iter in range(self.max_iters):
outputs, kl_loss = self.model(x, _eval=True)
loss = F.cross_entropy(outputs, labels, reduction=reduction4loss) + kl_loss * KL_WEIGHT
if reduction4loss == 'none':
grad_outputs = tensor2cuda(torch.ones(loss.shape))
else:
grad_outputs = None
grads = torch.autograd.grad(loss, x, grad_outputs=grad_outputs,
only_inputs=True)[0]
x.data += self.alpha * torch.sign(grads.data)
# the adversaries' pixel value should within max_x and min_x due
# to the l_infinity / l2 restriction
x = project(x, original_images, self.epsilon, self._type)
# the adversaries' value should be valid pixel value
x.clamp_(self.min_val, self.max_val)
self.model.train()
return x