mirror of
https://github.com/Shawn-Shan/fawkes.git
synced 2024-09-20 07:26:37 +05:30
373 lines
13 KiB
Python
373 lines
13 KiB
Python
|
import json
|
||
|
import os
|
||
|
import pickle
|
||
|
import random
|
||
|
|
||
|
import keras
|
||
|
import keras.backend as K
|
||
|
import numpy as np
|
||
|
import tensorflow as tf
|
||
|
from keras.applications.vgg16 import preprocess_input
|
||
|
from keras.layers import Dense, Activation
|
||
|
from keras.models import Model
|
||
|
from keras.preprocessing import image
|
||
|
from keras.utils import to_categorical
|
||
|
from sklearn.metrics import pairwise_distances
|
||
|
|
||
|
|
||
|
def clip_img(X, preprocessing='raw'):
|
||
|
X = reverse_preprocess(X, preprocessing)
|
||
|
X = np.clip(X, 0.0, 255.0)
|
||
|
X = preprocess(X, preprocessing)
|
||
|
return X
|
||
|
|
||
|
|
||
|
def dump_dictionary_as_json(dict, outfile):
|
||
|
j = json.dumps(dict)
|
||
|
with open(outfile, "wb") as f:
|
||
|
f.write(j.encode())
|
||
|
|
||
|
|
||
|
def fix_gpu_memory(mem_fraction=1):
|
||
|
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
|
||
|
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=mem_fraction)
|
||
|
tf_config = tf.ConfigProto(gpu_options=gpu_options)
|
||
|
tf_config.gpu_options.allow_growth = True
|
||
|
tf_config.log_device_placement = False
|
||
|
init_op = tf.global_variables_initializer()
|
||
|
sess = tf.Session(config=tf_config)
|
||
|
sess.run(init_op)
|
||
|
K.set_session(sess)
|
||
|
return sess
|
||
|
|
||
|
|
||
|
def load_victim_model(number_classes, teacher_model=None, end2end=False):
|
||
|
for l in teacher_model.layers:
|
||
|
l.trainable = end2end
|
||
|
x = teacher_model.layers[-1].output
|
||
|
|
||
|
x = Dense(number_classes)(x)
|
||
|
x = Activation('softmax', name="act")(x)
|
||
|
model = Model(teacher_model.input, x)
|
||
|
opt = keras.optimizers.Adadelta()
|
||
|
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
|
||
|
return model
|
||
|
|
||
|
|
||
|
def init_gpu(gpu_index, force=False):
|
||
|
if isinstance(gpu_index, list):
|
||
|
gpu_num = ','.join([str(i) for i in gpu_index])
|
||
|
else:
|
||
|
gpu_num = str(gpu_index)
|
||
|
if "CUDA_VISIBLE_DEVICES" in os.environ and os.environ["CUDA_VISIBLE_DEVICES"] and not force:
|
||
|
print('GPU already initiated')
|
||
|
return
|
||
|
os.environ["CUDA_VISIBLE_DEVICES"] = gpu_num
|
||
|
sess = fix_gpu_memory()
|
||
|
return sess
|
||
|
|
||
|
|
||
|
def preprocess(X, method):
|
||
|
assert method in {'raw', 'imagenet', 'inception', 'mnist'}
|
||
|
|
||
|
if method is 'raw':
|
||
|
pass
|
||
|
elif method is 'imagenet':
|
||
|
X = imagenet_preprocessing(X)
|
||
|
else:
|
||
|
raise Exception('unknown method %s' % method)
|
||
|
|
||
|
return X
|
||
|
|
||
|
|
||
|
def reverse_preprocess(X, method):
|
||
|
assert method in {'raw', 'imagenet', 'inception', 'mnist'}
|
||
|
|
||
|
if method is 'raw':
|
||
|
pass
|
||
|
elif method is 'imagenet':
|
||
|
X = imagenet_reverse_preprocessing(X)
|
||
|
else:
|
||
|
raise Exception('unknown method %s' % method)
|
||
|
|
||
|
return X
|
||
|
|
||
|
|
||
|
def imagenet_preprocessing(x, data_format=None):
|
||
|
if data_format is None:
|
||
|
data_format = K.image_data_format()
|
||
|
assert data_format in ('channels_last', 'channels_first')
|
||
|
|
||
|
x = np.array(x)
|
||
|
if data_format == 'channels_first':
|
||
|
# 'RGB'->'BGR'
|
||
|
if x.ndim == 3:
|
||
|
x = x[::-1, ...]
|
||
|
else:
|
||
|
x = x[:, ::-1, ...]
|
||
|
else:
|
||
|
# 'RGB'->'BGR'
|
||
|
x = x[..., ::-1]
|
||
|
|
||
|
mean = [103.939, 116.779, 123.68]
|
||
|
std = None
|
||
|
|
||
|
# Zero-center by mean pixel
|
||
|
if data_format == 'channels_first':
|
||
|
if x.ndim == 3:
|
||
|
x[0, :, :] -= mean[0]
|
||
|
x[1, :, :] -= mean[1]
|
||
|
x[2, :, :] -= mean[2]
|
||
|
if std is not None:
|
||
|
x[0, :, :] /= std[0]
|
||
|
x[1, :, :] /= std[1]
|
||
|
x[2, :, :] /= std[2]
|
||
|
else:
|
||
|
x[:, 0, :, :] -= mean[0]
|
||
|
x[:, 1, :, :] -= mean[1]
|
||
|
x[:, 2, :, :] -= mean[2]
|
||
|
if std is not None:
|
||
|
x[:, 0, :, :] /= std[0]
|
||
|
x[:, 1, :, :] /= std[1]
|
||
|
x[:, 2, :, :] /= std[2]
|
||
|
else:
|
||
|
x[..., 0] -= mean[0]
|
||
|
x[..., 1] -= mean[1]
|
||
|
x[..., 2] -= mean[2]
|
||
|
if std is not None:
|
||
|
x[..., 0] /= std[0]
|
||
|
x[..., 1] /= std[1]
|
||
|
x[..., 2] /= std[2]
|
||
|
|
||
|
return x
|
||
|
|
||
|
|
||
|
def imagenet_reverse_preprocessing(x, data_format=None):
|
||
|
import keras.backend as K
|
||
|
x = np.array(x)
|
||
|
if data_format is None:
|
||
|
data_format = K.image_data_format()
|
||
|
assert data_format in ('channels_last', 'channels_first')
|
||
|
|
||
|
if data_format == 'channels_first':
|
||
|
if x.ndim == 3:
|
||
|
# Zero-center by mean pixel
|
||
|
x[0, :, :] += 103.939
|
||
|
x[1, :, :] += 116.779
|
||
|
x[2, :, :] += 123.68
|
||
|
# 'BGR'->'RGB'
|
||
|
x = x[::-1, :, :]
|
||
|
else:
|
||
|
x[:, 0, :, :] += 103.939
|
||
|
x[:, 1, :, :] += 116.779
|
||
|
x[:, 2, :, :] += 123.68
|
||
|
x = x[:, ::-1, :, :]
|
||
|
else:
|
||
|
# Zero-center by mean pixel
|
||
|
x[..., 0] += 103.939
|
||
|
x[..., 1] += 116.779
|
||
|
x[..., 2] += 123.68
|
||
|
# 'BGR'->'RGB'
|
||
|
x = x[..., ::-1]
|
||
|
return x
|
||
|
|
||
|
|
||
|
def build_bottleneck_model(model, cut_off):
|
||
|
bottleneck_model = Model(model.input, model.get_layer(cut_off).output)
|
||
|
bottleneck_model.compile(loss='categorical_crossentropy',
|
||
|
optimizer='adam',
|
||
|
metrics=['accuracy'])
|
||
|
return bottleneck_model
|
||
|
|
||
|
|
||
|
def load_extractor(name):
|
||
|
model = keras.models.load_model("../feature_extractors/{}.h5".format(name))
|
||
|
if hasattr(model.layers[-1], "activation") and model.layers[-1].activation == "softmax":
|
||
|
raise Exception(
|
||
|
"Given extractor's last layer is softmax, need to remove the top layers to make it into a feature extractor")
|
||
|
# if "extract" in name.split("/")[-1]:
|
||
|
# pass
|
||
|
# else:
|
||
|
# print("Convert a model to a feature extractor")
|
||
|
# model = build_bottleneck_model(model, model.layers[layer_idx].name)
|
||
|
# model.save(name + "extract")
|
||
|
# model = keras.models.load_model(name + "extract")
|
||
|
return model
|
||
|
|
||
|
|
||
|
def get_dataset_path(dataset):
|
||
|
if not os.path.exists("config.json"):
|
||
|
raise Exception("Please config the datasets before running protection code. See more in README and config.py.")
|
||
|
|
||
|
config = json.load(open("config.json", 'r'))
|
||
|
if dataset not in config:
|
||
|
raise Exception(
|
||
|
"Dataset {} does not exist, please download to data/ and add the path to this function... Abort".format(
|
||
|
dataset))
|
||
|
return config[dataset]['train_dir'], config[dataset]['test_dir'], config[dataset]['num_classes'], config[dataset][
|
||
|
'num_images']
|
||
|
|
||
|
|
||
|
def normalize(x):
|
||
|
return x / np.linalg.norm(x, axis=1, keepdims=True)
|
||
|
|
||
|
|
||
|
class CloakData(object):
|
||
|
def __init__(self, dataset, img_shape=(224, 224), protect_class=None):
|
||
|
self.dataset = dataset
|
||
|
self.img_shape = img_shape
|
||
|
|
||
|
self.train_data_dir, self.test_data_dir, self.number_classes, self.number_samples = get_dataset_path(dataset)
|
||
|
self.all_labels = sorted(list(os.listdir(self.train_data_dir)))
|
||
|
if protect_class:
|
||
|
self.protect_class = protect_class
|
||
|
else:
|
||
|
self.protect_class = random.choice(self.all_labels)
|
||
|
|
||
|
self.sybil_class = random.choice([l for l in self.all_labels if l != self.protect_class])
|
||
|
self.protect_train_X, self.protect_test_X = self.load_label_data(self.protect_class)
|
||
|
self.sybil_train_X, self.sybil_test_X = self.load_label_data(self.sybil_class)
|
||
|
|
||
|
self.cloaked_protect_train_X = None
|
||
|
self.cloaked_sybil_train_X = None
|
||
|
|
||
|
self.label2path_train, self.label2path_test, self.path2idx = self.build_data_mapping()
|
||
|
self.all_training_path = self.get_all_data_path(self.label2path_train)
|
||
|
self.all_test_path = self.get_all_data_path(self.label2path_test)
|
||
|
self.protect_class_path = self.get_class_image_files(os.path.join(self.train_data_dir, self.protect_class))
|
||
|
self.sybil_class_path = self.get_class_image_files(os.path.join(self.train_data_dir, self.sybil_class))
|
||
|
|
||
|
print("Find {} protect images".format(len(self.protect_class_path)))
|
||
|
|
||
|
def get_class_image_files(self, path):
|
||
|
return [os.path.join(path, f) for f in os.listdir(path)]
|
||
|
|
||
|
def extractor_ls_predict(self, feature_extractors_ls, X):
|
||
|
feature_ls = []
|
||
|
for extractor in feature_extractors_ls:
|
||
|
cur_features = extractor.predict(X)
|
||
|
feature_ls.append(cur_features)
|
||
|
concated_feature_ls = np.concatenate(feature_ls, axis=1)
|
||
|
concated_feature_ls = normalize(concated_feature_ls)
|
||
|
return concated_feature_ls
|
||
|
|
||
|
def load_embeddings(self, feature_extractors_names):
|
||
|
dictionaries = []
|
||
|
for extractor_name in feature_extractors_names:
|
||
|
path2emb = pickle.load(open("../feature_extractors/embeddings/{}_emb_norm.p".format(extractor_name), "rb"))
|
||
|
dictionaries.append(path2emb)
|
||
|
|
||
|
merge_dict = {}
|
||
|
for k in dictionaries[0].keys():
|
||
|
cur_emb = [dic[k] for dic in dictionaries]
|
||
|
merge_dict[k] = np.concatenate(cur_emb)
|
||
|
return merge_dict
|
||
|
|
||
|
def select_target_label(self, feature_extractors_ls, feature_extractors_names, metric='l2'):
|
||
|
original_feature_x = self.extractor_ls_predict(feature_extractors_ls, self.protect_train_X)
|
||
|
|
||
|
path2emb = self.load_embeddings(feature_extractors_names)
|
||
|
items = list(path2emb.items())
|
||
|
paths = [p[0] for p in items]
|
||
|
embs = [p[1] for p in items]
|
||
|
embs = np.array(embs)
|
||
|
|
||
|
pair_dist = pairwise_distances(original_feature_x, embs, metric)
|
||
|
max_sum = np.min(pair_dist, axis=0)
|
||
|
sorted_idx = np.argsort(max_sum)[::-1]
|
||
|
|
||
|
highest_num = 0
|
||
|
paired_target_X = None
|
||
|
final_target_class_path = None
|
||
|
for idx in sorted_idx[:5]:
|
||
|
target_class_path = paths[idx]
|
||
|
cur_target_X = self.load_dir(target_class_path)
|
||
|
cur_target_X = np.concatenate([cur_target_X, cur_target_X, cur_target_X])
|
||
|
cur_tot_sum, cur_paired_target_X = self.calculate_dist_score(self.protect_train_X, cur_target_X,
|
||
|
feature_extractors_ls,
|
||
|
metric=metric)
|
||
|
if cur_tot_sum > highest_num:
|
||
|
highest_num = cur_tot_sum
|
||
|
paired_target_X = cur_paired_target_X
|
||
|
final_target_class_path = target_class_path
|
||
|
|
||
|
np.random.shuffle(paired_target_X)
|
||
|
return final_target_class_path, paired_target_X
|
||
|
|
||
|
def calculate_dist_score(self, a, b, feature_extractors_ls, metric='l2'):
|
||
|
features1 = self.extractor_ls_predict(feature_extractors_ls, a)
|
||
|
features2 = self.extractor_ls_predict(feature_extractors_ls, b)
|
||
|
|
||
|
pair_cos = pairwise_distances(features1, features2, metric)
|
||
|
max_sum = np.min(pair_cos, axis=0)
|
||
|
max_sum_arg = np.argsort(max_sum)[::-1]
|
||
|
max_sum_arg = max_sum_arg[:len(a)]
|
||
|
max_sum = [max_sum[i] for i in max_sum_arg]
|
||
|
paired_target_X = [b[j] for j in max_sum_arg]
|
||
|
paired_target_X = np.array(paired_target_X)
|
||
|
return np.min(max_sum), paired_target_X
|
||
|
|
||
|
def get_all_data_path(self, label2path):
|
||
|
all_paths = []
|
||
|
for k, v in label2path.items():
|
||
|
cur_all_paths = [os.path.join(k, cur_p) for cur_p in v]
|
||
|
all_paths.extend(cur_all_paths)
|
||
|
return all_paths
|
||
|
|
||
|
def load_label_data(self, label):
|
||
|
train_label_path = os.path.join(self.train_data_dir, label)
|
||
|
test_label_path = os.path.join(self.test_data_dir, label)
|
||
|
train_X = self.load_dir(train_label_path)
|
||
|
test_X = self.load_dir(test_label_path)
|
||
|
return train_X, test_X
|
||
|
|
||
|
def load_dir(self, path):
|
||
|
assert os.path.exists(path)
|
||
|
x_ls = []
|
||
|
for file in os.listdir(path):
|
||
|
cur_path = os.path.join(path, file)
|
||
|
im = image.load_img(cur_path, target_size=self.img_shape)
|
||
|
im = image.img_to_array(im)
|
||
|
x_ls.append(im)
|
||
|
raw_x = np.array(x_ls)
|
||
|
return preprocess_input(raw_x)
|
||
|
|
||
|
def build_data_mapping(self):
|
||
|
label2path_train = {}
|
||
|
label2path_test = {}
|
||
|
idx = 0
|
||
|
path2idx = {}
|
||
|
for label_name in self.all_labels:
|
||
|
full_path_train = os.path.join(self.train_data_dir, label_name)
|
||
|
full_path_test = os.path.join(self.test_data_dir, label_name)
|
||
|
label2path_train[full_path_train] = list(os.listdir(full_path_train))
|
||
|
label2path_test[full_path_test] = list(os.listdir(full_path_test))
|
||
|
for img_file in os.listdir(full_path_train):
|
||
|
path2idx[os.path.join(full_path_train, img_file)] = idx
|
||
|
for img_file in os.listdir(full_path_test):
|
||
|
path2idx[os.path.join(full_path_test, img_file)] = idx
|
||
|
idx += 1
|
||
|
return label2path_train, label2path_test, path2idx
|
||
|
|
||
|
def generate_data_post_cloak(self, sybil=False):
|
||
|
assert self.cloaked_protect_train_X is not None
|
||
|
while True:
|
||
|
batch_X = []
|
||
|
batch_Y = []
|
||
|
cur_batch_path = random.sample(self.all_training_path, 32)
|
||
|
for p in cur_batch_path:
|
||
|
cur_y = self.path2idx[p]
|
||
|
if p in self.protect_class_path:
|
||
|
cur_x = random.choice(self.cloaked_protect_train_X)
|
||
|
elif sybil and (p in self.sybil_class):
|
||
|
cur_x = random.choice(self.cloaked_sybil_train_X)
|
||
|
else:
|
||
|
im = image.load_img(p, target_size=self.img_shape)
|
||
|
im = image.img_to_array(im)
|
||
|
cur_x = preprocess_input(im)
|
||
|
batch_X.append(cur_x)
|
||
|
batch_Y.append(cur_y)
|
||
|
batch_X = np.array(batch_X)
|
||
|
batch_Y = to_categorical(np.array(batch_Y), num_classes=self.number_classes)
|
||
|
yield batch_X, batch_Y
|