-
Notifications
You must be signed in to change notification settings - Fork 97
/
Copy pathspsl_detector.py
148 lines (126 loc) · 5.77 KB
/
spsl_detector.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
'''
# author: Zhiyuan Yan
# email: [email protected]
# date: 2023-0706
# description: Class for the SPSLDetector
Functions in the Class are summarized as:
1. __init__: Initialization
2. build_backbone: Backbone-building
3. build_loss: Loss-function-building
4. features: Feature-extraction
5. classifier: Classification
6. get_losses: Loss-computation
7. get_train_metrics: Training-metrics-computation
8. get_test_metrics: Testing-metrics-computation
9. forward: Forward-propagation
Reference:
@inproceedings{liu2021spatial,
title={Spatial-phase shallow learning: rethinking face forgery detection in frequency domain},
author={Liu, Honggu and Li, Xiaodan and Zhou, Wenbo and Chen, Yuefeng and He, Yuan and Xue, Hui and Zhang, Weiming and Yu, Nenghai},
booktitle={Proceedings of the IEEE/CVF conference on computer vision and pattern recognition},
pages={772--781},
year={2021}
}
Notes:
To ensure consistency in the comparison with other detectors, we have opted not to utilize the shallow Xception architecture. Instead, we are employing the original Xception model.
'''
import os
import datetime
import logging
import numpy as np
from sklearn import metrics
from typing import Union
from collections import defaultdict
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.nn import DataParallel
from torch.utils.tensorboard import SummaryWriter
from metrics.base_metrics_class import calculate_metrics_for_train
from .base_detector import AbstractDetector
from detectors import DETECTOR
from networks import BACKBONE
from loss import LOSSFUNC
import random
logger = logging.getLogger(__name__)
@DETECTOR.register_module(module_name='spsl')
class SpslDetector(AbstractDetector):
def __init__(self, config):
super().__init__()
self.config = config
self.backbone = self.build_backbone(config)
self.loss_func = self.build_loss(config)
def build_backbone(self, config):
# prepare the backbone
backbone_class = BACKBONE[config['backbone_name']]
model_config = config['backbone_config']
backbone = backbone_class(model_config)
# To get a good performance, use the ImageNet-pretrained Xception model
state_dict = torch.load(config['pretrained'])
for name, weights in state_dict.items():
if 'pointwise' in name:
state_dict[name] = weights.unsqueeze(-1).unsqueeze(-1)
state_dict = {k:v for k, v in state_dict.items() if 'fc' not in k}
# remove conv1 from state_dict
conv1_data = state_dict.pop('conv1.weight')
backbone.load_state_dict(state_dict, False)
logger.info('Load pretrained model from {}'.format(config['pretrained']))
# copy on conv1
# let new conv1 use old param to balance the network
backbone.conv1 = nn.Conv2d(4, 32, 3, 2, 0, bias=False)
avg_conv1_data = conv1_data.mean(dim=1, keepdim=True) # average across the RGB channels
backbone.conv1.weight.data = avg_conv1_data.repeat(1, 4, 1, 1) # repeat the averaged weights across the 4 new channels
logger.info('Copy conv1 from pretrained model')
return backbone
def build_loss(self, config):
# prepare the loss function
loss_class = LOSSFUNC[config['loss_func']]
loss_func = loss_class()
return loss_func
def features(self, data_dict: dict, phase_fea) -> torch.tensor:
features = torch.cat((data_dict['image'], phase_fea), dim=1)
return self.backbone.features(features)
def classifier(self, features: torch.tensor) -> torch.tensor:
return self.backbone.classifier(features)
def get_losses(self, data_dict: dict, pred_dict: dict) -> dict:
label = data_dict['label']
pred = pred_dict['cls']
loss = self.loss_func(pred, label)
loss_dict = {'overall': loss}
return loss_dict
def get_train_metrics(self, data_dict: dict, pred_dict: dict) -> dict:
label = data_dict['label']
pred = pred_dict['cls']
# compute metrics for batch data
auc, eer, acc, ap = calculate_metrics_for_train(label.detach(), pred.detach())
metric_batch_dict = {'acc': acc, 'auc': auc, 'eer': eer, 'ap': ap}
# we dont compute the video-level metrics for training
self.video_names = []
return metric_batch_dict
def forward(self, data_dict: dict, inference=False) -> dict:
# get the phase features
phase_fea = self.phase_without_amplitude(data_dict['image'])
# bp
features = self.features(data_dict, phase_fea)
# get the prediction by classifier
pred = self.classifier(features)
# get the probability of the pred
prob = torch.softmax(pred, dim=1)[:, 1]
# build the prediction dict for each output
pred_dict = {'cls': pred, 'prob': prob, 'feat': features}
return pred_dict
def phase_without_amplitude(self, img):
# Convert to grayscale
gray_img = torch.mean(img, dim=1, keepdim=True) # shape: (batch_size, 1, 256, 256)
# Compute the DFT of the input signal
X = torch.fft.fftn(gray_img,dim=(-1,-2))
#X = torch.fft.fftn(img)
# Extract the phase information from the DFT
phase_spectrum = torch.angle(X)
# Create a new complex spectrum with the phase information and zero magnitude
reconstructed_X = torch.exp(1j * phase_spectrum)
# Use the IDFT to obtain the reconstructed signal
reconstructed_x = torch.real(torch.fft.ifftn(reconstructed_X,dim=(-1,-2)))
# reconstructed_x = torch.real(torch.fft.ifftn(reconstructed_X))
return reconstructed_x