2
0
mirror of https://github.com/Shawn-Shan/fawkes.git synced 2024-12-22 07:09:33 +05:30

refactor code

This commit is contained in:
Shawn-Shan 2020-05-18 15:35:14 -05:00
parent eeffa82598
commit de558e841e
6 changed files with 144 additions and 840 deletions

0
fawkes/__init__.py Normal file
View File

View File

@ -9,7 +9,7 @@ import time
from decimal import Decimal from decimal import Decimal
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
from .utils import preprocess, reverse_preprocess from utils import preprocess, reverse_preprocess
class FawkesMaskGeneration: class FawkesMaskGeneration:
@ -391,7 +391,7 @@ class FawkesMaskGeneration:
if iteration != 0 and iteration % (self.MAX_ITERATIONS // 3) == 0: if iteration != 0 and iteration % (self.MAX_ITERATIONS // 3) == 0:
LR = LR / 2 LR = LR / 2
print("Learning Rate: ", LR) print("Learning Rate: ", LR)
# print out the losses every 10%
if iteration % (self.MAX_ITERATIONS // 10) == 0: if iteration % (self.MAX_ITERATIONS // 10) == 0:
if self.verbose == 1: if self.verbose == 1:
loss_sum = float(self.sess.run(self.loss_sum)) loss_sum = float(self.sess.run(self.loss_sum))

View File

@ -1,617 +0,0 @@
import sys
sys.path.append("/home/shansixioing/tools/")
import gen_utils
import keras, os
from keras.preprocessing import image
from skimage.transform import resize
from sklearn.model_selection import train_test_split
from keras.models import Model
from keras.layers import Input
from keras.layers import Conv2D, MaxPooling2D, Dense, Activation, Layer
import keras.backend as K
import random, pickle
import numpy as np
from keras.preprocessing.image import ImageDataGenerator
from keras.applications.vgg16 import preprocess_input
from sklearn.metrics import pairwise_distances
from keras.utils import to_categorical
def load_dataset_deepid(full=False, num_classes=1283, preprocess='raw'):
if not full:
X_train, Y_train = gen_utils.load_h5py(["X_train", "Y_train"],
"/mnt/data/sixiongshan/backdoor/data/deepid/deepid_data_training_0.h5")
else:
X_train_0, Y_train_0 = gen_utils.load_h5py(["X_train", "Y_train"],
"/mnt/data/sixiongshan/backdoor/data/deepid/deepid_data_training_0.h5")
X_train_1, Y_train_1 = gen_utils.load_h5py(["X_train", "Y_train"],
"/mnt/data/sixiongshan/backdoor/data/deepid/deepid_data_training_1.h5")
X_train_2, Y_train_2 = gen_utils.load_h5py(["X_train", "Y_train"],
"/mnt/data/sixiongshan/backdoor/data/deepid/deepid_data_training_2.h5")
X_train_3, Y_train_3 = gen_utils.load_h5py(["X_train", "Y_train"],
"/mnt/data/sixiongshan/backdoor/data/deepid/deepid_data_training_3.h5")
X_train = np.concatenate([X_train_0, X_train_1, X_train_2, X_train_3])
Y_train = np.concatenate([Y_train_0, Y_train_1, Y_train_2, Y_train_3])
X_test, Y_test = gen_utils.load_h5py(["X_test", "Y_test"],
"/mnt/data/sixiongshan/backdoor/data/deepid/deepid_data_testing.h5")
X_train = utils_keras.preprocess(X_train, preprocess)
X_test = utils_keras.preprocess(X_test, preprocess)
return X_train, Y_train, X_test, Y_test
def load_dataset(data_file):
dataset = utils_keras.load_dataset(data_file)
X_train = dataset['X_train']
Y_train = dataset['Y_train']
X_test = dataset['X_test']
Y_test = dataset['Y_test']
return X_train, Y_train, X_test, Y_test
def load_extractor(name, all_layers=False):
if name is None:
return
m = keras.models.load_model("/home/shansixioing/cloak/models/extractors/{}_extract.h5".format(name))
if all_layers:
if name == 'vggface1':
target_layers = ['conv4_3', 'conv5_1', 'conv5_2', 'conv5_3', 'flatten', 'fc6', 'fc7']
extractor = Model(inputs=m.layers[0].input,
outputs=[m.get_layer(l).output for l in target_layers])
return m
def transfer_learning_model(teacher_model, number_classes):
for l in teacher_model.layers:
l.trainable = False
x = teacher_model.layers[-1].output
x = Dense(number_classes)(x)
x = Activation('softmax', name="act")(x)
model = Model(teacher_model.input, x)
opt = keras.optimizers.Adadelta()
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
return model
def clip_img(X, preprocessing='raw'):
X = utils_keras.reverse_preprocess(X, preprocessing)
X = np.clip(X, 0.0, 255.0)
X = utils_keras.preprocess(X, preprocessing)
return X
def get_dataset_path(dataset):
if dataset == "webface":
train_data_dir = '/mnt/data/sixiongshan/data/webface/train'
test_data_dir = '/mnt/data/sixiongshan/data/webface/test'
number_classes = 10575
number_samples = 475137
elif dataset == "vggface1":
train_data_dir = '/mnt/data/sixiongshan/data/vggface/train'
test_data_dir = '/mnt/data/sixiongshan/data/vggface/test'
number_classes = 2622
number_samples = 1716436 // 3
elif dataset == "vggface2":
train_data_dir = '/mnt/data/sixiongshan/data/vggface2/train'
test_data_dir = '/mnt/data/sixiongshan/data/vggface2/test'
number_classes = 8631
number_samples = 3141890 // 3
elif dataset == "scrub":
train_data_dir = '/mnt/data/sixiongshan/data/facescrub/keras_flow_dir/train'
test_data_dir = '/mnt/data/sixiongshan/data/facescrub/keras_flow_dir/test'
number_classes = 530
number_samples = 57838
elif dataset == "youtubeface":
train_data_dir = '/mnt/data/sixiongshan/data/youtubeface/keras_flow_data/train_mtcnnpy_224'
test_data_dir = '/mnt/data/sixiongshan/data/youtubeface/keras_flow_data/test_mtcnnpy_224'
number_classes = 1283
number_samples = 587137 // 5
elif dataset == "emily":
train_data_dir = '/mnt/data/sixiongshan/data/emface/train'
test_data_dir = '/mnt/data/sixiongshan/data/emface/test'
number_classes = 66
number_samples = 6070
elif dataset == "pubfig":
train_data_dir = '/mnt/data/sixiongshan/data/pubfig/train'
test_data_dir = '/mnt/data/sixiongshan/data/pubfig/test'
number_classes = 65
number_samples = 5979
elif dataset == "iris":
train_data_dir = '/mnt/data/sixiongshan/data/iris/train'
test_data_dir = '/mnt/data/sixiongshan/data/iris/test'
number_classes = 1000
number_samples = 14000
else:
print("Dataset {} does not exist... Abort".format(dataset))
exit(1)
return train_data_dir, test_data_dir, number_classes, number_samples
def large_dataset_loader(dataset, augmentation=False, test_only=False, image_size=(224, 224)):
train_data_dir, test_data_dir, number_classes, number_samples = get_dataset_path(dataset)
train_generator, test_generator = generator_wrap(train_data_dir=train_data_dir, test_data_dir=test_data_dir,
augmentation=augmentation,
test_only=test_only, image_size=image_size)
return train_generator, test_generator, number_classes, number_samples
def sample_from_generator(gen, nb_sample):
x_test, y_test = gen.next()
X_sample = np.zeros((0, x_test.shape[1], x_test.shape[2], x_test.shape[3]))
Y_sample = np.zeros((0, y_test.shape[1]))
while X_sample.shape[0] < nb_sample:
x, y = gen.next()
X_sample = np.concatenate((X_sample, x), axis=0)
Y_sample = np.concatenate((Y_sample, y), axis=0)
X_sample = X_sample[:nb_sample]
Y_sample = Y_sample[:nb_sample]
return X_sample, Y_sample
def generator_wrap(train_data_dir=None, test_data_dir=None, augmentation=False, test_only=False, image_size=(224, 224)):
if not test_data_dir:
validation_split = 0.05
else:
validation_split = 0
if augmentation:
data_gen = ImageDataGenerator(
preprocessing_function=preprocess_input,
rotation_range=20,
width_shift_range=0.15,
height_shift_range=0.15,
shear_range=0.,
zoom_range=0.15,
channel_shift_range=0.,
fill_mode='nearest',
cval=0.,
horizontal_flip=True, validation_split=validation_split)
else:
data_gen = ImageDataGenerator(preprocessing_function=preprocess_input, validation_split=validation_split)
if test_data_dir is None:
train_generator = data_gen.flow_from_directory(
train_data_dir,
target_size=image_size,
batch_size=32, subset='training')
test_generator = data_gen.flow_from_directory(
train_data_dir,
target_size=image_size,
batch_size=32, subset='validation')
else:
if test_only:
train_generator = None
else:
train_generator = data_gen.flow_from_directory(
train_data_dir,
target_size=image_size,
batch_size=32)
test_generator = data_gen.flow_from_directory(
test_data_dir,
target_size=image_size,
batch_size=32)
return train_generator, test_generator
class MergeLayer(Layer):
def __init__(self, **kwargs):
self.result = None
super(MergeLayer, self).__init__(**kwargs)
def build(self, input_shape):
# Create a trainable weight variable for this layer.
kernel_1_shape = (5 * 4 * 60, 160)
kernel_2_shape = (4 * 3 * 80, 160)
bias_shape = (160,)
self.kernel_1 = self.add_weight(name='kernel_1',
shape=kernel_1_shape,
initializer='uniform',
trainable=True)
self.kernel_2 = self.add_weight(name='kernel_2',
shape=kernel_2_shape,
initializer='uniform',
trainable=True)
self.bias = self.add_weight(name='bias',
shape=bias_shape,
initializer='uniform',
trainable=True)
super(MergeLayer, self).build(input_shape) # Be sure to call this at the end
def call(self, x):
layer1 = x[0]
layer2 = x[1]
layer1_r = K.reshape(layer1, (-1, 5 * 4 * 60))
layer2_r = K.reshape(layer2, (-1, 4 * 3 * 80))
self.result = K.dot(layer1_r, self.kernel_1) + \
K.dot(layer2_r, self.kernel_2) + self.bias
return self.result
def compute_output_shape(self, input_shape):
return K.int_shape(self.result)
def load_deepid_model(class_num):
input_shape = (55, 47, 3)
img_input = Input(shape=input_shape)
h1 = Conv2D(20, (4, 4), strides=(1, 1), padding='valid', name='conv_1')(img_input)
h1 = Activation('relu')(h1)
h1 = MaxPooling2D((2, 2), strides=(2, 2), name='pool_1')(h1)
h2 = Conv2D(40, (3, 3), strides=(1, 1), padding='valid', name='conv_2')(h1)
h2 = Activation('relu')(h2)
h2 = MaxPooling2D((2, 2), strides=(2, 2), name='pool_2')(h2)
h3 = Conv2D(60, (3, 3), strides=(1, 1), padding='valid', name='conv_3')(h2)
h3 = Activation('relu')(h3)
h3 = MaxPooling2D((2, 2), strides=(2, 2), name='pool_3')(h3)
h4 = Conv2D(80, (2, 2), strides=(1, 1), padding='valid', name='conv_4')(h3)
h4 = Activation('relu')(h4)
h5 = MergeLayer()([h3, h4])
h5 = Activation('relu')(h5)
h5 = Dense(class_num, name='fc')(h5)
h5 = Activation('softmax')(h5)
inputs = img_input
model = Model(inputs, h5, name='vgg_face')
return model
def get_label_data(X, Y, target):
X_filter = np.array(X)
Y_filter = np.array(Y)
remain_idx = np.argmax(Y, axis=1) == target
X_filter = X_filter[remain_idx]
Y_filter = Y_filter[remain_idx]
return X_filter, Y_filter
def get_other_label_data(X, Y, target):
X_filter = np.array(X)
Y_filter = np.array(Y)
remain_idx = np.argmax(Y, axis=1) != target
X_filter = X_filter[remain_idx]
Y_filter = Y_filter[remain_idx]
return X_filter, Y_filter
def get_labels_data(X, Y, target_ls):
assert isinstance(target_ls, list)
X_filter = np.array(X)
Y_filter = np.array(Y)
remain_idx = np.array([False] * len(Y_filter))
for target in target_ls:
cur_remain_idx = np.argmax(Y, axis=1) == target
remain_idx = np.logical_or(remain_idx, cur_remain_idx)
X_filter = X_filter[remain_idx]
Y_filter = Y_filter[remain_idx]
return X_filter, Y_filter
def get_other_labels_data_except(X, Y, target_ls):
assert isinstance(target_ls, list)
X_filter = np.array(X)
Y_filter = np.array(Y)
remain_idx = np.array([True] * len(Y_filter))
for target in target_ls:
cur_remain_idx = np.argmax(Y, axis=1) != target
remain_idx = np.logical_and(remain_idx, cur_remain_idx)
X_filter = X_filter[remain_idx]
Y_filter = Y_filter[remain_idx]
return X_filter, Y_filter
def get_bottom_top_model(model, layer_name):
layer = model.get_layer(layer_name)
bottom_input = Input(model.input_shape[1:])
bottom_output = bottom_input
top_input = Input(layer.output_shape[1:])
top_output = top_input
bottom = True
for layer in model.layers:
if bottom:
bottom_output = layer(bottom_output)
else:
top_output = layer(top_output)
if layer.name == layer_name:
bottom = False
bottom_model = Model(bottom_input, bottom_output)
top_model = Model(top_input, top_output)
return bottom_model, top_model
def load_end2end_model(arch, number_classes):
if arch == 'resnet':
MODEL = keras.applications.resnet_v2.ResNet152V2(include_top=False, weights='imagenet', pooling='avg',
input_shape=(224, 224, 3))
elif arch == 'inception':
MODEL = keras.applications.InceptionResNetV2(include_top=False, weights='imagenet', pooling='avg',
input_shape=(224, 224, 3))
elif arch == 'mobile':
MODEL = keras.applications.mobilenet_v2.MobileNetV2(include_top=False, weights='imagenet', pooling='avg',
input_shape=(224, 224, 3))
elif arch == 'dense':
MODEL = keras.applications.densenet.DenseNet121(include_top=False, weights='imagenet', pooling='avg',
input_shape=(224, 224, 3))
model = load_victim_model(number_classes, MODEL, end2end=True)
return model
def load_victim_model(number_classes, teacher_model=None, end2end=False):
for l in teacher_model.layers:
l.trainable = end2end
x = teacher_model.layers[-1].output
x = Dense(number_classes)(x)
x = Activation('softmax', name="act")(x)
model = Model(teacher_model.input, x)
opt = keras.optimizers.Adadelta()
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
return model
def add_last_layer(number_classes, teacher_model, cut_to_layer=None):
for l in teacher_model.layers:
l.trainable = False
if cut_to_layer:
x = teacher_model.layers[cut_to_layer].output
print(teacher_model.layers[cut_to_layer].name)
else:
x = teacher_model.layers[-1].output
x = Dense(number_classes, name='softmax')(x)
x = Activation('softmax', name="act")(x)
model = Model(teacher_model.input, x)
opt = keras.optimizers.Adadelta()
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
return model
def resize_batch(x, target_size=(224, 224), intensity="imagenet"):
if x.shape[:2] == target_size:
return x
x = utils_keras.reverse_preprocess(x, intensity)
resized = np.array([resize(a, target_size) for a in x])
return utils_keras.preprocess(resized, intensity)
def build_bottleneck_model(model, cut_off):
bottleneck_model = Model(model.input, model.get_layer(cut_off).output)
bottleneck_model.compile(loss='categorical_crossentropy',
optimizer='adam',
metrics=['accuracy'])
return bottleneck_model
def split_dataset(X, y, ratio=0.3):
x_appro, x_later, y_appro, y_later = train_test_split(X, y, test_size=ratio, random_state=0)
return x_appro, x_later, y_appro, y_later
def data_generator(X, Y, batch_size=32, target_size=(224, 224), intensity='imagenet'):
data_gen = ImageDataGenerator()
data_gen = data_gen.flow(X, Y, batch_size=batch_size)
while True:
cur_X, cur_Y = next(data_gen)
cur_X = resize_batch(cur_X, target_size=target_size, intensity=intensity)
yield np.array(cur_X), cur_Y
def evaluate(model, X_test, Y_test, batch_size=32, target_size=(224, 224)):
test_other_gen = data_generator(X_test, Y_test, batch_size=batch_size, target_size=target_size)
if len(X_test) < batch_size * 2:
batch_size = 1
test_other_step = len(X_test) // batch_size // 2
acc = model.evaluate_generator(test_other_gen, steps=test_other_step, verbose=0)[1]
return acc
def normalize(x):
return x / np.linalg.norm(x, axis=1, keepdims=True)
class CloakData(object):
def __init__(self, dataset, img_shape=(224, 224), target_selection_tries=30, protect_class=None):
self.dataset = dataset
self.img_shape = img_shape
self.target_selection_tries = target_selection_tries
self.train_data_dir, self.test_data_dir, self.number_classes, self.number_samples = get_dataset_path(dataset)
self.all_labels = sorted(list(os.listdir(self.train_data_dir)))
if protect_class:
self.protect_class = protect_class
else:
self.protect_class = random.choice(self.all_labels)
self.sybil_class = random.choice([l for l in self.all_labels if l != self.protect_class])
print("Protect label: {} | Sybil label: {}".format(self.protect_class, self.sybil_class))
self.protect_train_X, self.protect_test_X = self.load_label_data(self.protect_class)
self.sybil_train_X, self.sybil_test_X = self.load_label_data(self.sybil_class)
# self.target_path, self.target_data = self.select_target_label()
self.cloaked_protect_train_X = None
self.cloaked_sybil_train_X = None
self.label2path_train, self.label2path_test, self.path2idx = self.build_data_mapping()
self.all_training_path = self.get_all_data_path(self.label2path_train)
self.all_test_path = self.get_all_data_path(self.label2path_test)
self.protect_class_path = self.get_class_image_files(os.path.join(self.train_data_dir, self.protect_class))
self.sybil_class_path = self.get_class_image_files(os.path.join(self.train_data_dir, self.sybil_class))
print(
"Find {} protect images | {} sybil images".format(len(self.protect_class_path), len(self.sybil_class_path)))
def get_class_image_files(self, path):
return [os.path.join(path, f) for f in os.listdir(path)]
def extractor_ls_predict(self, feature_extractors_ls, X):
feature_ls = []
for extractor in feature_extractors_ls:
cur_features = extractor.predict(X)
feature_ls.append(cur_features)
concated_feature_ls = np.concatenate(feature_ls, axis=1)
concated_feature_ls = normalize(concated_feature_ls)
return concated_feature_ls
def load_embeddings(self, feature_extractors_names):
dictionaries = []
for extractor_name in feature_extractors_names:
path2emb = pickle.load(open("/home/shansixioing/cloak/embs/{}_emb_norm.p".format(extractor_name), "rb"))
# path2emb = pickle.load(open("/home/shansixioing/cloak/embs/vggface2_inception_emb.p".format(extractor_name), "rb"))
dictionaries.append(path2emb)
merge_dict = {}
for k in dictionaries[0].keys():
cur_emb = [dic[k] for dic in dictionaries]
merge_dict[k] = np.concatenate(cur_emb)
return merge_dict
def select_target_label(self, feature_extractors_ls, feature_extractors_names, metric='l2'):
# original_feature_x = extractor.predict(self.protect_train_X)
original_feature_x = self.extractor_ls_predict(feature_extractors_ls, self.protect_train_X)
path2emb = self.load_embeddings(feature_extractors_names)
# items = list(path2emb.items())
teacher_dataset = feature_extractors_names[0].split("_")[0]
# items = [(k, v) for k, v in path2emb.items() if teacher_dataset in k]
items = list(path2emb.items())
paths = [p[0] for p in items]
embs = [p[1] for p in items]
embs = np.array(embs)
pair_dist = pairwise_distances(original_feature_x, embs, 'l2')
max_sum = np.min(pair_dist, axis=0)
sorted_idx = np.argsort(max_sum)[::-1]
highest_num = 0
paired_target_X = None
final_target_class_path = None
for idx in sorted_idx[:2]:
target_class_path = paths[idx]
cur_target_X = self.load_dir(target_class_path)
cur_target_X = np.concatenate([cur_target_X, cur_target_X, cur_target_X])
cur_tot_sum, cur_paired_target_X = self.calculate_dist_score(self.protect_train_X, cur_target_X,
feature_extractors_ls,
metric=metric)
if cur_tot_sum > highest_num:
highest_num = cur_tot_sum
paired_target_X = cur_paired_target_X
final_target_class_path = target_class_path
np.random.shuffle(paired_target_X)
return final_target_class_path, paired_target_X
def calculate_dist_score(self, a, b, feature_extractors_ls, metric='l2'):
features1 = self.extractor_ls_predict(feature_extractors_ls, a)
features2 = self.extractor_ls_predict(feature_extractors_ls, b)
pair_cos = pairwise_distances(features1, features2, metric)
max_sum = np.min(pair_cos, axis=0)
max_sum_arg = np.argsort(max_sum)[::-1]
max_sum_arg = max_sum_arg[:len(a)]
max_sum = [max_sum[i] for i in max_sum_arg]
paired_target_X = [b[j] for j in max_sum_arg]
paired_target_X = np.array(paired_target_X)
return np.min(max_sum), paired_target_X
def get_all_data_path(self, label2path):
all_paths = []
for k, v in label2path.items():
cur_all_paths = [os.path.join(k, cur_p) for cur_p in v]
all_paths.extend(cur_all_paths)
return all_paths
def load_label_data(self, label):
train_label_path = os.path.join(self.train_data_dir, label)
test_label_path = os.path.join(self.test_data_dir, label)
train_X = self.load_dir(train_label_path)
test_X = self.load_dir(test_label_path)
return train_X, test_X
def load_dir(self, path):
assert os.path.exists(path)
x_ls = []
for file in os.listdir(path):
cur_path = os.path.join(path, file)
im = image.load_img(cur_path, target_size=self.img_shape)
im = image.img_to_array(im)
x_ls.append(im)
raw_x = np.array(x_ls)
return preprocess_input(raw_x)
def build_data_mapping(self):
label2path_train = {}
label2path_test = {}
idx = 0
path2idx = {}
for label_name in self.all_labels:
full_path_train = os.path.join(self.train_data_dir, label_name)
full_path_test = os.path.join(self.test_data_dir, label_name)
label2path_train[full_path_train] = list(os.listdir(full_path_train))
label2path_test[full_path_test] = list(os.listdir(full_path_test))
for img_file in os.listdir(full_path_train):
path2idx[os.path.join(full_path_train, img_file)] = idx
for img_file in os.listdir(full_path_test):
path2idx[os.path.join(full_path_test, img_file)] = idx
idx += 1
return label2path_train, label2path_test, path2idx
def generate_data_post_cloak(self, sybil=False):
assert self.cloaked_protect_train_X is not None
while True:
batch_X = []
batch_Y = []
cur_batch_path = random.sample(self.all_training_path, 32)
for p in cur_batch_path:
cur_y = self.path2idx[p]
if p in self.protect_class_path:
cur_x = random.choice(self.cloaked_protect_train_X)
elif sybil and (p in self.sybil_class):
cur_x = random.choice(self.cloaked_sybil_train_X)
else:
im = image.load_img(p, target_size=self.img_shape)
im = image.img_to_array(im)
cur_x = preprocess_input(im)
batch_X.append(cur_x)
batch_Y.append(cur_y)
batch_X = np.array(batch_X)
batch_Y = to_categorical(np.array(batch_Y), num_classes=self.number_classes)
yield batch_X, batch_Y

View File

@ -4,30 +4,23 @@ sys.path.append("/home/shansixioing/tools/")
sys.path.append("/home/shansixioing/cloak/") sys.path.append("/home/shansixioing/cloak/")
import argparse import argparse
import gen_utils
from tensorflow import set_random_seed from tensorflow import set_random_seed
from encode_utils import * from utils import init_gpu, load_extractor, load_victim_model, dump_dictionary_as_json
import os
import numpy as np
import random import random
import pickle import pickle
import re import re
import locale from keras.preprocessing import image
from keras.utils import to_categorical
from keras.applications.vgg16 import preprocess_input
loc = locale.getlocale() # import locale
locale.setlocale(locale.LC_ALL, loc) #
# loc = locale.getlocale()
# locale.setlocale(locale.LC_ALL, loc)
SEEDS = [12345, 23451, 34512, 45123, 51234, 54321, 43215, 32154, 21543, 15432] SEEDS = [12345, 23451, 34512, 45123, 51234, 54321, 43215, 32154, 21543, 15432]
IMG_SHAPE = [224, 224, 3]
MODEL = {
'vggface1_inception': "0",
'vggface1_dense': "1",
"vggface2_inception": "2",
"vggface2_dense": "3",
"webface_dense": "4",
"webface_inception": "5",
}
RES_DIR = '/home/shansixioing/cloak/results/'
def select_samples(data_dir): def select_samples(data_dir):
@ -40,40 +33,30 @@ def select_samples(data_dir):
return all_data_path return all_data_path
def generator_wrap(cloak_data, n_uncloaked, n_classes, test=False, validation_split=0.1): def generator_wrap(cloak_data, n_classes, test=False, validation_split=0.1):
if test: if test:
# all_data_path = cloak_data.all_test_path
all_data_path = select_samples(cloak_data.test_data_dir) all_data_path = select_samples(cloak_data.test_data_dir)
else: else:
# all_data_path = cloak_data.all_training_path
all_data_path = select_samples(cloak_data.train_data_dir) all_data_path = select_samples(cloak_data.train_data_dir)
split = int(len(cloak_data.cloaked_protect_train_X) * (1 - validation_split)) split = int(len(cloak_data.cloaked_protect_train_X) * (1 - validation_split))
cloaked_train_X = cloak_data.cloaked_protect_train_X[:split] cloaked_train_X = cloak_data.cloaked_protect_train_X[:split]
if cloak_data.cloaked_sybil_train_X is not None:
cloaked_sybil_X = cloak_data.cloaked_sybil_train_X #[:args.number_sybil * 131]
#
# for _ in range(len(cloaked_sybil_X) - 131):
# all_data_path.append(cloak_data.sybil_class_path[0])
# random seed for selecting uncloaked pictures
np.random.seed(12345) np.random.seed(12345)
uncloaked_path = np.random.choice(cloak_data.protect_class_path, n_uncloaked).tolist()
# all_vals = list(cloak_data.path2idx.items())
while True: while True:
batch_X = [] batch_X = []
batch_Y = [] batch_Y = []
cur_batch_path = np.random.choice(all_data_path, args.batch_size) cur_batch_path = np.random.choice(all_data_path, args.batch_size)
for p in cur_batch_path: for p in cur_batch_path:
# p = p.encode("utf-8").decode("ascii", 'ignore')
cur_y = cloak_data.path2idx[p] cur_y = cloak_data.path2idx[p]
# protect class and sybil class do not need to appear in test dataset # protect class and sybil class do not need to appear in test dataset
if test and (re.search(cloak_data.protect_class, p) or re.search(cloak_data.sybil_class, p)): if test and (re.search(cloak_data.protect_class, p)):
continue continue
# protect class images in train dataset # protect class images in train dataset
elif p in cloak_data.protect_class_path and p not in uncloaked_path: elif p in cloak_data.protect_class_path:
cur_x = random.choice(cloaked_train_X) cur_x = random.choice(cloaked_train_X)
# sybil class in train dataset
elif p in cloak_data.sybil_class_path and cloak_data.cloaked_sybil_train_X is not None:
cur_x = random.choice(cloaked_sybil_X)
else: else:
im = image.load_img(p, target_size=cloak_data.img_shape) im = image.load_img(p, target_size=cloak_data.img_shape)
im = image.img_to_array(im) im = image.img_to_array(im)
@ -108,35 +91,33 @@ def main():
SEED = SEEDS[args.seed_idx] SEED = SEEDS[args.seed_idx]
random.seed(SEED) random.seed(SEED)
set_random_seed(SEED) set_random_seed(SEED)
gen_utils.init_gpu(args.gpu) init_gpu(args.gpu)
if args.dataset == 'pubfig': if args.dataset == 'pubfig':
N_CLASSES = 65 N_CLASSES = 65
CLOAK_DIR = "{}_tm{}_tgt57_r1.0_th{}".format(args.dataset, args.model_idx, args.th) CLOAK_DIR = args.cloak_data
elif args.dataset == 'scrub': elif args.dataset == 'scrub':
N_CLASSES = 530 N_CLASSES = 530
CLOAK_DIR = "{}_tm{}_tgtPatrick_Dempsey_r1.0_th{}_joint".format(args.dataset, args.model_idx, args.th) CLOAK_DIR = args.cloak_data
elif args.dataset == 'webface':
N_CLASSES = 10575
CLOAK_DIR = "{}_tm{}_tgt1640351_r1.0_th0.01/".format(args.dataset, args.model_idx)
else: else:
raise ValueError raise ValueError
CLOAK_DIR = CLOAK_DIR + "_th{}_sd{}".format(args.th, int(args.sd))
print(CLOAK_DIR) print(CLOAK_DIR)
CLOAK_DIR = os.path.join(RES_DIR, CLOAK_DIR) CLOAK_DIR = os.path.join("../results", CLOAK_DIR)
RES = pickle.load(open(os.path.join(CLOAK_DIR, "cloak_data.p"), 'rb')) RES = pickle.load(open(os.path.join(CLOAK_DIR, "cloak_data.p"), 'rb'))
print("Build attacker's model") print("Build attacker's model")
cloak_data = RES['cloak_data'] cloak_data = RES['cloak_data']
EVAL_RES = {} EVAL_RES = {}
train_generator = generator_wrap(cloak_data, n_uncloaked=args.n_uncloaked, n_classes=N_CLASSES, train_generator = generator_wrap(cloak_data, n_classes=N_CLASSES,
validation_split=args.validation_split) validation_split=args.validation_split)
test_generator = generator_wrap(cloak_data, test=True, n_uncloaked=args.n_uncloaked, n_classes=N_CLASSES, test_generator = generator_wrap(cloak_data, test=True, n_classes=N_CLASSES,
validation_split=args.validation_split) validation_split=args.validation_split)
EVAL_RES['transfer_model'] = args.transfer_model EVAL_RES['transfer_model'] = args.transfer_model
if args.end2end:
model = load_end2end_model("dense", N_CLASSES)
else:
base_model = load_extractor(args.transfer_model) base_model = load_extractor(args.transfer_model)
model = load_victim_model(teacher_model=base_model, number_classes=N_CLASSES) model = load_victim_model(teacher_model=base_model, number_classes=N_CLASSES)
@ -144,9 +125,12 @@ def main():
cloaked_test_X, cloaked_test_Y = eval_cloaked_test_data(cloak_data, N_CLASSES, cloaked_test_X, cloaked_test_Y = eval_cloaked_test_data(cloak_data, N_CLASSES,
validation_split=args.validation_split) validation_split=args.validation_split)
try:
model.fit_generator(train_generator, steps_per_epoch=cloak_data.number_samples // 32, model.fit_generator(train_generator, steps_per_epoch=cloak_data.number_samples // 32,
validation_data=(original_X, original_Y), epochs=args.n_epochs, verbose=2, validation_data=(original_X, original_Y), epochs=args.n_epochs, verbose=1,
use_multiprocessing=True, workers=3) use_multiprocessing=False, workers=1)
except KeyboardInterrupt:
pass
_, acc_original = model.evaluate(original_X, original_Y, verbose=0) _, acc_original = model.evaluate(original_X, original_Y, verbose=0)
print("Accuracy on uncloaked/original images TEST: {:.4f}".format(acc_original)) print("Accuracy on uncloaked/original images TEST: {:.4f}".format(acc_original))
@ -156,43 +140,34 @@ def main():
print("Accuracy on cloaked images TEST: {:.4f}".format(acc_cloaked)) print("Accuracy on cloaked images TEST: {:.4f}".format(acc_cloaked))
EVAL_RES['acc_cloaked'] = acc_cloaked EVAL_RES['acc_cloaked'] = acc_cloaked
# pred = model.predict_generator(test_generator, verbose=0, steps=10)
# pred = np.argmax(pred, axis=1)
# print(pred)
_, other_acc = model.evaluate_generator(test_generator, verbose=0, steps=50) _, other_acc = model.evaluate_generator(test_generator, verbose=0, steps=50)
print("Accuracy on other classes {:.4f}".format(other_acc)) print("Accuracy on other classes {:.4f}".format(other_acc))
EVAL_RES['other_acc'] = other_acc EVAL_RES['other_acc'] = other_acc
gen_utils.dump_dictionary_as_json(EVAL_RES, dump_dictionary_as_json(EVAL_RES,
os.path.join(CLOAK_DIR, "{}_eval_sybil_uncloaked{}_seed{}_th{}.json".format( os.path.join(CLOAK_DIR, "eval_seed{}_th{}.json".format(args.seed_idx, args.th)))
args.transfer_model, args.end2end, args.seed_idx, args.th)))
def parse_arguments(argv): def parse_arguments(argv):
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('--gpu', type=str, parser.add_argument('--gpu', type=str,
help='GPU id', default='1') help='GPU id', default='2')
parser.add_argument('--n_uncloaked', type=int,
help='number of uncloaked images', default=0)
parser.add_argument('--seed_idx', type=int, parser.add_argument('--seed_idx', type=int,
help='random seed index', default=0) help='random seed index', default=0)
parser.add_argument('--dataset', type=str, parser.add_argument('--dataset', type=str,
help='name of dataset', default='pubfig') help='name of dataset', default='scrub')
parser.add_argument('--model_idx', type=str, parser.add_argument('--cloak_data', type=str,
help='teacher model index', default="2") help='name of the cloak result directory',
default='scrub_webface_dense_robust_protectPatrick_Dempsey')
parser.add_argument('--sd', type=int, default=1e6)
parser.add_argument('--th', type=float, default=0.01)
parser.add_argument('--transfer_model', type=str, parser.add_argument('--transfer_model', type=str,
help='student model', default='vggface2_inception') help='student model', default='../feature_extractors/vggface2_inception_extract.h5')
parser.add_argument('--end2end', type=int,
help='whether use end2end', default=0)
parser.add_argument('--batch_size', type=int, default=32) parser.add_argument('--batch_size', type=int, default=32)
parser.add_argument('--validation_split', type=float, default=0.1) parser.add_argument('--validation_split', type=float, default=0.1)
parser.add_argument('--use_sybil', type=int,
help='whether use sybil class', default=0)
parser.add_argument('--number_sybil', type=int,
help='whether use sybil class', default=1)
parser.add_argument('--n_epochs', type=int, default=3) parser.add_argument('--n_epochs', type=int, default=3)
parser.add_argument('--th', type=float, default=0.01)
parser.add_argument('--limit', type=int, default=0)
return parser.parse_args(argv) return parser.parse_args(argv)

View File

@ -1,56 +1,35 @@
import argparse
import os
import pickle
import random
import sys import sys
sys.path.append("/home/shansixioing/tools/")
sys.path.append("/home/shansixioing/cloak/")
import argparse
from tensorflow import set_random_seed
from .differentiator import FawkesMaskGeneration
import os
import numpy as np import numpy as np
import random from differentiator import FawkesMaskGeneration
import pickle from tensorflow import set_random_seed
from .utils import load_extractor, CloakData, init_gpu from utils import load_extractor, CloakData, init_gpu
#
random.seed(12243) random.seed(12243)
np.random.seed(122412) np.random.seed(122412)
set_random_seed(12242) set_random_seed(12242)
SYBIL_ONLY = False NUM_IMG_PROTECTED = 10 # Number of images used to optimize the target class
BATCH_SIZE = 10
NUM_IMG_PROTECTED = 20 # Number of images used to optimize the target class
BATCH_SIZE = 20
MODEL_IDX = {
'vggface1_inception': "0",
'vggface1_dense': "1",
"vggface2_inception": "2",
"vggface2_dense": "3",
"webface_dense": "4",
"webface_inception": "5",
}
IDX2MODEL = {v: k for k, v in MODEL_IDX.items()}
IMG_SHAPE = [224, 224, 3] IMG_SHAPE = [224, 224, 3]
GLOBAL_MASK = 0 MAX_ITER = 1000
MAXIMIZE = False
MAX_ITER = 500
INITIAL_CONST = 1e6
LR = 0.1
def diff_protected_data(sess, feature_extractors_ls, image_X, number_protect, target_X=None, sybil=False, th=0.01): def diff_protected_data(sess, feature_extractors_ls, image_X, number_protect, target_X=None, th=0.01):
image_X = image_X[:number_protect] image_X = image_X[:number_protect]
differentiator = FawkesMaskGeneration(sess, feature_extractors_ls, differentiator = FawkesMaskGeneration(sess, feature_extractors_ls,
batch_size=BATCH_SIZE, batch_size=BATCH_SIZE,
mimic_img=True, mimic_img=True,
intensity_range='imagenet', intensity_range='imagenet',
initial_const=INITIAL_CONST, initial_const=args.sd,
learning_rate=LR, learning_rate=args.lr,
max_iterations=MAX_ITER, max_iterations=MAX_ITER,
l_threshold=th, l_threshold=th,
verbose=1, maximize=False, keep_final=False, image_shape=image_X.shape[1:]) verbose=1, maximize=False, keep_final=False, image_shape=image_X.shape[1:])
@ -62,61 +41,43 @@ def diff_protected_data(sess, feature_extractors_ls, image_X, number_protect, ta
return cloaked_image_X return cloaked_image_X
def save_results(RES, path):
pickle.dump(RES, open(path, "wb"))
def perform_defense(): def perform_defense():
RES = {} RES = {}
sess = init_gpu(args.gpu) sess = init_gpu(args.gpu)
DSSIM_THRESHOLD = args.th
FEATURE_EXTRACTORS = [IDX2MODEL[args.model_idx]]
MODEL_HASH = "".join(MODEL_IDX[m] for m in FEATURE_EXTRACTORS)
FEATURE_EXTRACTORS = [args.feature_extractor]
RES_DIR = '../results/' RES_DIR = '../results/'
RES['num_img_protected'] = NUM_IMG_PROTECTED RES['num_img_protected'] = NUM_IMG_PROTECTED
RES['extractors'] = FEATURE_EXTRACTORS RES['extractors'] = FEATURE_EXTRACTORS
num_protect = NUM_IMG_PROTECTED num_protect = NUM_IMG_PROTECTED
print(FEATURE_EXTRACTORS) print("Loading {} for optimization".format(args.feature_extractor))
feature_extractors_ls = [load_extractor(name) for name in FEATURE_EXTRACTORS] feature_extractors_ls = [load_extractor(name, layer_idx=args.layer_idx) for name in FEATURE_EXTRACTORS]
protect_class = args.protect_class protect_class = args.protect_class
cloak_data = CloakData(args.dataset, target_selection_tries=1, protect_class=protect_class) cloak_data = CloakData(args.dataset, target_selection_tries=1, protect_class=protect_class)
model_name = args.feature_extractor.split("/")[-1].split('.')[0].replace("_extract", "")
RES_FILE_NAME = "{}_{}_protect{}_th{}_sd{}".format(args.dataset, model_name, cloak_data.protect_class, args.th,
args.sd)
RES_FILE_NAME = os.path.join(RES_DIR, RES_FILE_NAME)
if os.path.exists(RES_FILE_NAME):
exit(1)
print("Protect Class: ", cloak_data.protect_class) print("Protect Class: ", cloak_data.protect_class)
if "robust" in FEATURE_EXTRACTORS[0]:
non_robust = MODEL_IDX["_".join(FEATURE_EXTRACTORS[0].split("_")[:2])]
if args.dataset == 'pubfig':
CLOAK_DIR = 'pubfig_tm{}_tgt57_r1.0_th0.01'.format(non_robust)
CLOAK_DIR = os.path.join(RES_DIR, CLOAK_DIR)
RES = pickle.load(open(os.path.join(CLOAK_DIR, "cloak_data.p"), 'rb'))
cloak_data = RES['cloak_data']
elif args.dataset == 'scrub':
CLOAK_DIR = 'scrub_tm{}_tgtPatrick_Dempsey_r1.0_th0.01'.format(non_robust)
CLOAK_DIR = os.path.join(RES_DIR, CLOAK_DIR)
RES = pickle.load(open(os.path.join(CLOAK_DIR, "cloak_data.p"), 'rb'))
cloak_data = RES['cloak_data']
else:
cloak_data.target_path, cloak_data.target_data = cloak_data.select_target_label(feature_extractors_ls, cloak_data.target_path, cloak_data.target_data = cloak_data.select_target_label(feature_extractors_ls,
FEATURE_EXTRACTORS) FEATURE_EXTRACTORS)
RES_FILE_NAME = "{}_tm{}_tgt{}_r{}_th{}".format(args.dataset, MODEL_HASH, cloak_data.protect_class, RATIO, os.makedirs(RES_DIR, exist_ok=True)
DSSIM_THRESHOLD)
RES_FILE_NAME = os.path.join(RES_DIR, RES_FILE_NAME)
os.makedirs(RES_FILE_NAME, exist_ok=True) os.makedirs(RES_FILE_NAME, exist_ok=True)
print("Protect Current Label Data...")
cloak_image_X = diff_protected_data(sess, feature_extractors_ls, cloak_data.protect_train_X, cloak_image_X = diff_protected_data(sess, feature_extractors_ls, cloak_data.protect_train_X,
number_protect=num_protect, number_protect=num_protect,
target_X=cloak_data.target_data, sybil=False, th=DSSIM_THRESHOLD) target_X=cloak_data.target_data, th=args.th)
cloak_data.cloaked_protect_train_X = cloak_image_X cloak_data.cloaked_protect_train_X = cloak_image_X
RES['cloak_data'] = cloak_data RES['cloak_data'] = cloak_data
save_results(RES, os.path.join(RES_FILE_NAME, 'cloak_data.p')) pickle.dump(RES, open(os.path.join(RES_FILE_NAME, 'cloak_data.p'), "wb"))
def parse_arguments(argv): def parse_arguments(argv):
@ -124,11 +85,18 @@ def parse_arguments(argv):
parser.add_argument('--gpu', type=str, parser.add_argument('--gpu', type=str,
help='GPU id', default='0') help='GPU id', default='0')
parser.add_argument('--dataset', type=str, parser.add_argument('--dataset', type=str,
help='name of dataset', default='pubfig') help='name of dataset', default='scrub')
parser.add_argument('--model_idx', type=str, parser.add_argument('--feature-extractor', type=str,
help='teacher model index', default="3") help="name of the feature extractor used for optimization",
default="../feature_extractors/webface_dense_robust_extract.h5")
parser.add_argument('--layer-idx', type=int,
help="the idx of the layer of neuron that are used as feature space",
default=-3)
parser.add_argument('--th', type=float, default=0.01) parser.add_argument('--th', type=float, default=0.01)
parser.add_argument('--sd', type=int, default=1e4)
parser.add_argument('--protect_class', type=str, default=None) parser.add_argument('--protect_class', type=str, default=None)
parser.add_argument('--lr', type=float, default=0.1)
return parser.parse_args(argv) return parser.parse_args(argv)

View File

@ -1,3 +1,4 @@
import json
import os import os
import pickle import pickle
import random import random
@ -7,11 +8,26 @@ import keras.backend as K
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
from keras.applications.vgg16 import preprocess_input from keras.applications.vgg16 import preprocess_input
from keras.layers import Dense, Activation
from keras.models import Model
from keras.preprocessing import image from keras.preprocessing import image
from keras.utils import to_categorical from keras.utils import to_categorical
from sklearn.metrics import pairwise_distances from sklearn.metrics import pairwise_distances
def clip_img(X, preprocessing='raw'):
X = reverse_preprocess(X, preprocessing)
X = np.clip(X, 0.0, 255.0)
X = preprocess(X, preprocessing)
return X
def dump_dictionary_as_json(dict, outfile):
j = json.dumps(dict)
with open(outfile, "wb") as f:
f.write(j.encode())
def fix_gpu_memory(mem_fraction=1): def fix_gpu_memory(mem_fraction=1):
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=mem_fraction) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=mem_fraction)
@ -25,6 +41,19 @@ def fix_gpu_memory(mem_fraction=1):
return sess return sess
def load_victim_model(number_classes, teacher_model=None, end2end=False):
for l in teacher_model.layers:
l.trainable = end2end
x = teacher_model.layers[-1].output
x = Dense(number_classes)(x)
x = Activation('softmax', name="act")(x)
model = Model(teacher_model.input, x)
opt = keras.optimizers.Adadelta()
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
return model
def init_gpu(gpu_index, force=False): def init_gpu(gpu_index, force=False):
if isinstance(gpu_index, list): if isinstance(gpu_index, list):
gpu_num = ','.join([str(i) for i in gpu_index]) gpu_num = ','.join([str(i) for i in gpu_index])
@ -152,93 +181,43 @@ def imagenet_reverse_preprocessing(x, data_format=None):
return x return x
def imagenet_reverse_preprocessing_cntk(x, data_format=None): def build_bottleneck_model(model, cut_off):
import keras.backend as K bottleneck_model = Model(model.input, model.get_layer(cut_off).output)
""" Reverse preprocesses a tensor encoding a batch of images. bottleneck_model.compile(loss='categorical_crossentropy',
# Arguments optimizer='adam',
x: input Numpy tensor, 4D. metrics=['accuracy'])
data_format: data format of the image tensor. return bottleneck_model
# Returns
Preprocessed tensor.
"""
x = np.array(x)
if data_format is None:
data_format = K.image_data_format()
assert data_format in ('channels_last', 'channels_first')
if data_format == 'channels_first':
# Zero-center by mean pixel def load_extractor(name, layer_idx=None):
x[:, 0, :, :] += 114.0 model = keras.models.load_model(name)
x[:, 1, :, :] += 114.0
x[:, 2, :, :] += 114.0 if "extract" in name.split("/")[-1]:
# 'BGR'->'RGB' model = keras.models.load_model(name)
x = x[:, ::-1, :, :]
else: else:
# Zero-center by mean pixel print("Convert a model to a feature extractor")
x[:, :, :, 0] += 114.0 model = build_bottleneck_model(model, model.layers[layer_idx].name)
x[:, :, :, 1] += 114.0 model.save(name + "extract")
x[:, :, :, 2] += 114.0 model = keras.models.load_model(name + "extract")
# 'BGR'->'RGB'
x = x[:, :, :, ::-1]
return x
def load_extractor(name):
model = keras.models.load_model("/home/shansixioing/cloak/models/extractors/{}_extract.h5".format(name))
return model return model
def get_dataset_path(dataset): def get_dataset_path(dataset):
if dataset == "webface": if dataset == "scrub":
train_data_dir = '/mnt/data/sixiongshan/data/webface/train' train_data_dir = '../data/scrub/train'
test_data_dir = '/mnt/data/sixiongshan/data/webface/test' test_data_dir = '../data/scrub/test'
number_classes = 10575
number_samples = 475137
elif dataset == "vggface1":
train_data_dir = '/mnt/data/sixiongshan/data/vggface/train'
test_data_dir = '/mnt/data/sixiongshan/data/vggface/test'
number_classes = 2622
number_samples = 1716436 // 3
elif dataset == "vggface2":
train_data_dir = '/mnt/data/sixiongshan/data/vggface2/train'
test_data_dir = '/mnt/data/sixiongshan/data/vggface2/test'
number_classes = 8631
number_samples = 3141890 // 3
elif dataset == "scrub":
train_data_dir = '/mnt/data/sixiongshan/data/facescrub/keras_flow_dir/train'
test_data_dir = '/mnt/data/sixiongshan/data/facescrub/keras_flow_dir/test'
number_classes = 530 number_classes = 530
number_samples = 57838 number_samples = 57838
elif dataset == "youtubeface":
train_data_dir = '/mnt/data/sixiongshan/data/youtubeface/keras_flow_data/train_mtcnnpy_224'
test_data_dir = '/mnt/data/sixiongshan/data/youtubeface/keras_flow_data/test_mtcnnpy_224'
number_classes = 1283
number_samples = 587137 // 5
elif dataset == "emily":
train_data_dir = '/mnt/data/sixiongshan/data/emface/train'
test_data_dir = '/mnt/data/sixiongshan/data/emface/test'
number_classes = 66
number_samples = 6070
elif dataset == "pubfig": elif dataset == "pubfig":
train_data_dir = '/mnt/data/sixiongshan/data/pubfig/train' train_data_dir = '../data/pubfig/train'
test_data_dir = '/mnt/data/sixiongshan/data/pubfig/test' test_data_dir = '../data/pubfig/test'
number_classes = 65 number_classes = 65
number_samples = 5979 number_samples = 5979
elif dataset == "iris":
train_data_dir = '/mnt/data/sixiongshan/data/iris/train'
test_data_dir = '/mnt/data/sixiongshan/data/iris/test'
number_classes = 1000
number_samples = 14000
else: else:
print("Dataset {} does not exist... Abort".format(dataset)) raise Exception(
exit(1) "Dataset {} does not exist, please download to data/ and add the path to this function... Abort".format(
dataset))
return train_data_dir, test_data_dir, number_classes, number_samples return train_data_dir, test_data_dir, number_classes, number_samples
@ -261,7 +240,6 @@ class CloakData(object):
self.protect_class = random.choice(self.all_labels) self.protect_class = random.choice(self.all_labels)
self.sybil_class = random.choice([l for l in self.all_labels if l != self.protect_class]) self.sybil_class = random.choice([l for l in self.all_labels if l != self.protect_class])
print("Protect label: {} | Sybil label: {}".format(self.protect_class, self.sybil_class))
self.protect_train_X, self.protect_test_X = self.load_label_data(self.protect_class) self.protect_train_X, self.protect_test_X = self.load_label_data(self.protect_class)
self.sybil_train_X, self.sybil_test_X = self.load_label_data(self.sybil_class) self.sybil_train_X, self.sybil_test_X = self.load_label_data(self.sybil_class)
@ -290,11 +268,11 @@ class CloakData(object):
def load_embeddings(self, feature_extractors_names): def load_embeddings(self, feature_extractors_names):
dictionaries = [] dictionaries = []
for extractor_name in feature_extractors_names: for extractor_name in feature_extractors_names:
path2emb = pickle.load(open("/home/shansixioing/cloak/embs/{}_emb_norm.p".format(extractor_name), "rb")) extractor_name = extractor_name.split("/")[-1].split('.')[0].replace("_extract", "")
# path2emb = pickle.load(open("/home/shansixioing/cloak/embs/vggface2_inception_emb.p".format(extractor_name), "rb")) path2emb = pickle.load(open("../feature_extractors/embeddings/{}_emb_norm.p".format(extractor_name), "rb"))
dictionaries.append(path2emb) dictionaries.append(path2emb)
merge_dict = {} merge_dict = {}
for k in dictionaries[0].keys(): for k in dictionaries[0].keys():
cur_emb = [dic[k] for dic in dictionaries] cur_emb = [dic[k] for dic in dictionaries]