2
0
mirror of https://github.com/Shawn-Shan/fawkes.git synced 2024-11-12 02:01:30 +05:30

add fawkes-lite and fawkes-dev

This commit is contained in:
Shawn-Shan 2020-06-28 19:13:14 -05:00
parent 5b15437b69
commit 7fc3a82437
11 changed files with 1092 additions and 86 deletions

View File

@ -136,18 +136,21 @@ class FawkesMaskGeneration:
def batch_gen_DSSIM(aimg_raw_split, simg_raw_split): def batch_gen_DSSIM(aimg_raw_split, simg_raw_split):
msssim_split = tf.image.ssim(aimg_raw_split, simg_raw_split, max_val=255.0) msssim_split = tf.image.ssim(aimg_raw_split, simg_raw_split, max_val=255.0)
dist = (1.0 - tf.stack(msssim_split)) / 2.0 dist = (1.0 - tf.stack(msssim_split)) / 2.0
# dist = tf.square(aimg_raw_split - simg_raw_split)
return dist return dist
# raw value of DSSIM distance # raw value of DSSIM distance
self.dist_raw = batch_gen_DSSIM(self.aimg_raw, self.simg_raw) self.dist_raw = batch_gen_DSSIM(self.aimg_raw, self.simg_raw)
# distance value after applying threshold # distance value after applying threshold
self.dist = tf.maximum(self.dist_raw - self.l_threshold, 0.0) self.dist = tf.maximum(self.dist_raw - self.l_threshold, 0.0)
# self.dist = self.dist_raw
self.dist_raw_sum = tf.reduce_sum( self.dist_raw_sum = tf.reduce_sum(
tf.where(self.mask, tf.where(self.mask,
self.dist_raw, self.dist_raw,
tf.zeros_like(self.dist_raw))) tf.zeros_like(self.dist_raw)))
self.dist_sum = tf.reduce_sum(tf.where(self.mask, self.dist, tf.zeros_like(self.dist))) self.dist_sum = tf.reduce_sum(tf.where(self.mask, self.dist, tf.zeros_like(self.dist)))
# self.dist_sum = 1e-5 * tf.reduce_sum(self.dist)
# self.dist_raw_sum = self.dist_sum
def resize_tensor(input_tensor, model_input_shape): def resize_tensor(input_tensor, model_input_shape):
if input_tensor.shape[1:] == model_input_shape or model_input_shape[1] is None: if input_tensor.shape[1:] == model_input_shape or model_input_shape[1] is None:
@ -158,12 +161,6 @@ class FawkesMaskGeneration:
def calculate_direction(bottleneck_model, cur_timg_input, cur_simg_input): def calculate_direction(bottleneck_model, cur_timg_input, cur_simg_input):
target_features = bottleneck_model(cur_timg_input) target_features = bottleneck_model(cur_timg_input)
return target_features return target_features
# target_center = tf.reduce_mean(target_features, axis=0)
# original = bottleneck_model(cur_simg_input)
# original_center = tf.reduce_mean(original, axis=0)
# direction = target_center - original_center
# final_target = original + self.ratio * direction
# return final_target
self.bottlesim = 0.0 self.bottlesim = 0.0
self.bottlesim_sum = 0.0 self.bottlesim_sum = 0.0
@ -201,7 +198,14 @@ class FawkesMaskGeneration:
else: else:
self.loss = self.const * tf.square(self.dist) + self.bottlesim self.loss = self.const * tf.square(self.dist) + self.bottlesim
self.loss_sum = tf.reduce_sum(tf.where(self.mask, self.loss, tf.zeros_like(self.loss))) self.loss_sum = tf.reduce_sum(tf.where(self.mask,
self.loss,
tf.zeros_like(self.loss)))
# self.loss_sum = self.dist_sum + tf.reduce_sum(self.bottlesim)
# import pdb
# pdb.set_trace()
# self.loss_sum = tf.reduce_sum(tf.where(self.mask, self.loss, tf.zeros_like(self.loss)))
# Setup the Adadelta optimizer and keep track of variables # Setup the Adadelta optimizer and keep track of variables
# we're creating # we're creating
@ -321,25 +325,13 @@ class FawkesMaskGeneration:
weights_batch[:nb_imgs] = weights[:nb_imgs] weights_batch[:nb_imgs] = weights[:nb_imgs]
modifier_batch = np.ones(self.input_shape) * 1e-6 modifier_batch = np.ones(self.input_shape) * 1e-6
# set the variables so that we don't have to send them over again self.sess.run(self.setup,
if self.MIMIC_IMG: {self.assign_timg_tanh: timg_tanh_batch,
self.sess.run(self.setup, self.assign_simg_tanh: simg_tanh_batch,
{self.assign_timg_tanh: timg_tanh_batch, self.assign_const: CONST,
self.assign_simg_tanh: simg_tanh_batch, self.assign_mask: mask,
self.assign_const: CONST, self.assign_weights: weights_batch,
self.assign_mask: mask, self.assign_modifier: modifier_batch})
self.assign_weights: weights_batch,
self.assign_modifier: modifier_batch})
else:
# if directly mimicking a vector, use assign_bottleneck_t_raw
# in setup
self.sess.run(self.setup,
{self.assign_bottleneck_t_raw: timg_tanh_batch,
self.assign_simg_tanh: simg_tanh_batch,
self.assign_const: CONST,
self.assign_mask: mask,
self.assign_weights: weights_batch,
self.assign_modifier: modifier_batch})
best_bottlesim = [0] * nb_imgs if self.maximize else [np.inf] * nb_imgs best_bottlesim = [0] * nb_imgs if self.maximize else [np.inf] * nb_imgs
best_adv = np.zeros_like(source_imgs) best_adv = np.zeros_like(source_imgs)
@ -390,7 +382,7 @@ class FawkesMaskGeneration:
best_adv[e] = aimg_input best_adv[e] = aimg_input
if iteration != 0 and iteration % (self.MAX_ITERATIONS // 3) == 0: if iteration != 0 and iteration % (self.MAX_ITERATIONS // 3) == 0:
LR = LR / 2 # LR = LR / 2
print("Learning Rate: ", LR) print("Learning Rate: ", LR)
if iteration % (self.MAX_ITERATIONS // 10) == 0: if iteration % (self.MAX_ITERATIONS // 10) == 0:

View File

@ -1,28 +1,29 @@
import argparse import argparse
import glob
import os import os
import pickle
import random import random
import sys import sys
import numpy as np import numpy as np
from differentiator import FawkesMaskGeneration from differentiator import FawkesMaskGeneration
from keras.applications.vgg16 import preprocess_input
from keras.preprocessing import image
from skimage.transform import resize
from tensorflow import set_random_seed from tensorflow import set_random_seed
from utils import load_extractor, CloakData, init_gpu from utils import load_extractor, init_gpu, select_target_label, dump_image, reverse_process_cloaked
random.seed(12243) random.seed(12243)
np.random.seed(122412) np.random.seed(122412)
set_random_seed(12242) set_random_seed(12242)
NUM_IMG_PROTECTED = 32 # Number of images used to optimize the target class
BATCH_SIZE = 32 BATCH_SIZE = 32
MAX_ITER = 1000 MAX_ITER = 1000
def diff_protected_data(sess, feature_extractors_ls, image_X, number_protect, target_X=None, th=0.01): def generate_cloak_images(sess, feature_extractors, image_X, target_X=None, th=0.01):
image_X = image_X[:number_protect] batch_size = BATCH_SIZE if len(image_X) > BATCH_SIZE else len(image_X)
differentiator = FawkesMaskGeneration(sess, feature_extractors_ls, differentiator = FawkesMaskGeneration(sess, feature_extractors,
batch_size=BATCH_SIZE, batch_size=batch_size,
mimic_img=True, mimic_img=True,
intensity_range='imagenet', intensity_range='imagenet',
initial_const=args.sd, initial_const=args.sd,
@ -31,65 +32,81 @@ def diff_protected_data(sess, feature_extractors_ls, image_X, number_protect, ta
l_threshold=th, l_threshold=th,
verbose=1, maximize=False, keep_final=False, image_shape=image_X.shape[1:]) verbose=1, maximize=False, keep_final=False, image_shape=image_X.shape[1:])
if len(target_X) < len(image_X):
target_X = np.concatenate([target_X, target_X, target_X])
target_X = target_X[:len(image_X)]
cloaked_image_X = differentiator.attack(image_X, target_X) cloaked_image_X = differentiator.attack(image_X, target_X)
return cloaked_image_X return cloaked_image_X
def perform_defense(): def extract_faces(img):
RES = {} # foo
return preprocess_input(resize(img, (224, 224)))
def fawkes():
assert os.path.exists(args.directory)
assert os.path.isdir(args.directory)
sess = init_gpu(args.gpu) sess = init_gpu(args.gpu)
FEATURE_EXTRACTORS = [args.feature_extractor]
RES_DIR = '../results/'
RES['num_img_protected'] = NUM_IMG_PROTECTED
RES['extractors'] = FEATURE_EXTRACTORS
num_protect = NUM_IMG_PROTECTED
print("Loading {} for optimization".format(args.feature_extractor)) print("Loading {} for optimization".format(args.feature_extractor))
feature_extractors_ls = [load_extractor(name) for name in FEATURE_EXTRACTORS]
protect_class = args.protect_class
cloak_data = CloakData(args.dataset, protect_class=protect_class) feature_extractors_ls = [load_extractor(args.feature_extractor)]
RES_FILE_NAME = "{}_{}_protect{}".format(args.dataset, args.feature_extractor, cloak_data.protect_class)
RES_FILE_NAME = os.path.join(RES_DIR, RES_FILE_NAME)
print("Protect Class: ", cloak_data.protect_class)
cloak_data.target_path, cloak_data.target_data = cloak_data.select_target_label(feature_extractors_ls, image_paths = glob.glob(os.path.join(args.directory, "*"))
FEATURE_EXTRACTORS) image_paths = [path for path in image_paths if "_cloaked" not in path.split("/")[-1]]
os.makedirs(RES_DIR, exist_ok=True) orginal_images = [extract_faces(image.img_to_array(image.load_img(cur_path))) for cur_path in
os.makedirs(RES_FILE_NAME, exist_ok=True) image_paths]
cloak_image_X = diff_protected_data(sess, feature_extractors_ls, cloak_data.protect_train_X, orginal_images = np.array(orginal_images)
number_protect=num_protect,
target_X=cloak_data.target_data, th=args.th)
cloak_data.cloaked_protect_train_X = cloak_image_X if args.seperate_target:
RES['cloak_data'] = cloak_data target_images = []
pickle.dump(RES, open(os.path.join(RES_FILE_NAME, 'cloak_data.p'), "wb")) for org_img in orginal_images:
org_img = org_img.reshape([1] + list(org_img.shape))
tar_img = select_target_label(org_img, feature_extractors_ls, [args.feature_extractor])
target_images.append(tar_img)
target_images = np.concatenate(target_images)
# import pdb
# pdb.set_trace()
else:
target_images = select_target_label(orginal_images, feature_extractors_ls, [args.feature_extractor])
# file_name = args.directory.split("/")[-1]
# os.makedirs(args.result_directory, exist_ok=True)
# os.makedirs(os.path.join(args.result_directory, file_name), exist_ok=True)
protected_images = generate_cloak_images(sess, feature_extractors_ls, orginal_images,
target_X=target_images, th=args.th)
for p_img, path in zip(protected_images, image_paths):
p_img = reverse_process_cloaked(p_img)
# img_type = path.split(".")[-1]
file_name = "{}_cloaked.jpeg".format(".".join(path.split(".")[:-1]))
dump_image(p_img, file_name, format="JPEG")
def parse_arguments(argv): def parse_arguments(argv):
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('--gpu', type=str, parser.add_argument('--gpu', type=str,
help='GPU id', default='0') help='GPU id', default='0')
parser.add_argument('--dataset', type=str, parser.add_argument('--directory', type=str,
help='name of dataset', default='scrub') help='directory that contain images for cloaking', default='imgs/')
parser.add_argument('--feature-extractor', type=str, parser.add_argument('--feature-extractor', type=str,
help="name of the feature extractor used for optimization", help="name of the feature extractor used for optimization",
default="webface_dense_robust_extract") default="webface_dense_robust_extract")
parser.add_argument('--th', type=float, default=0.007)
parser.add_argument('--sd', type=int, default=1e5) parser.add_argument('--th', type=float, default=0.005)
parser.add_argument('--sd', type=int, default=1e10)
parser.add_argument('--protect_class', type=str, default=None) parser.add_argument('--protect_class', type=str, default=None)
parser.add_argument('--lr', type=float, default=0.1) parser.add_argument('--lr', type=float, default=0.1)
parser.add_argument('--result_directory', type=str, default="../results")
parser.add_argument('--seperate_target', action='store_true')
return parser.parse_args(argv) return parser.parse_args(argv)
if __name__ == '__main__': if __name__ == '__main__':
args = parse_arguments(sys.argv[1:]) args = parse_arguments(sys.argv[1:])
perform_defense() fawkes()

View File

@ -141,7 +141,6 @@ def imagenet_preprocessing(x, data_format=None):
return x return x
def imagenet_reverse_preprocessing(x, data_format=None): def imagenet_reverse_preprocessing(x, data_format=None):
import keras.backend as K import keras.backend as K
x = np.array(x) x = np.array(x)
@ -172,6 +171,11 @@ def imagenet_reverse_preprocessing(x, data_format=None):
return x return x
def reverse_process_cloaked(x, preprocess='imagenet'):
x = clip_img(x, preprocess)
return reverse_preprocess(x, preprocess)
def build_bottleneck_model(model, cut_off): def build_bottleneck_model(model, cut_off):
bottleneck_model = Model(model.input, model.get_layer(cut_off).output) bottleneck_model = Model(model.input, model.get_layer(cut_off).output)
bottleneck_model.compile(loss='categorical_crossentropy', bottleneck_model.compile(loss='categorical_crossentropy',
@ -212,32 +216,116 @@ def normalize(x):
return x / np.linalg.norm(x, axis=1, keepdims=True) return x / np.linalg.norm(x, axis=1, keepdims=True)
def dump_image(x, filename, format="png", scale=False):
img = image.array_to_img(x, scale=scale)
img.save(filename, format)
return
def load_dir(path):
assert os.path.exists(path)
x_ls = []
for file in os.listdir(path):
cur_path = os.path.join(path, file)
im = image.load_img(cur_path, target_size=(224, 224))
im = image.img_to_array(im)
x_ls.append(im)
raw_x = np.array(x_ls)
return preprocess_input(raw_x)
def load_embeddings(feature_extractors_names):
dictionaries = []
for extractor_name in feature_extractors_names:
path2emb = pickle.load(open("../feature_extractors/embeddings/{}_emb_norm.p".format(extractor_name), "rb"))
dictionaries.append(path2emb)
merge_dict = {}
for k in dictionaries[0].keys():
cur_emb = [dic[k] for dic in dictionaries]
merge_dict[k] = np.concatenate(cur_emb)
return merge_dict
def extractor_ls_predict(feature_extractors_ls, X):
feature_ls = []
for extractor in feature_extractors_ls:
cur_features = extractor.predict(X)
feature_ls.append(cur_features)
concated_feature_ls = np.concatenate(feature_ls, axis=1)
concated_feature_ls = normalize(concated_feature_ls)
return concated_feature_ls
def calculate_dist_score(a, b, feature_extractors_ls, metric='l2'):
features1 = extractor_ls_predict(feature_extractors_ls, a)
features2 = extractor_ls_predict(feature_extractors_ls, b)
pair_cos = pairwise_distances(features1, features2, metric)
max_sum = np.min(pair_cos, axis=0)
max_sum_arg = np.argsort(max_sum)[::-1]
max_sum_arg = max_sum_arg[:len(a)]
max_sum = [max_sum[i] for i in max_sum_arg]
paired_target_X = [b[j] for j in max_sum_arg]
paired_target_X = np.array(paired_target_X)
return np.min(max_sum), paired_target_X
def select_target_label(imgs, feature_extractors_ls, feature_extractors_names, metric='l2'):
original_feature_x = extractor_ls_predict(feature_extractors_ls, imgs)
path2emb = load_embeddings(feature_extractors_names)
items = list(path2emb.items())
paths = [p[0] for p in items]
embs = [p[1] for p in items]
embs = np.array(embs)
pair_dist = pairwise_distances(original_feature_x, embs, metric)
max_sum = np.min(pair_dist, axis=0)
sorted_idx = np.argsort(max_sum)[::-1]
highest_num = 0
paired_target_X = None
final_target_class_path = None
for idx in sorted_idx[:1]:
target_class_path = paths[idx]
cur_target_X = load_dir(target_class_path)
cur_target_X = np.concatenate([cur_target_X, cur_target_X, cur_target_X])
cur_tot_sum, cur_paired_target_X = calculate_dist_score(imgs, cur_target_X,
feature_extractors_ls,
metric=metric)
if cur_tot_sum > highest_num:
highest_num = cur_tot_sum
paired_target_X = cur_paired_target_X
final_target_class_path = target_class_path
np.random.shuffle(paired_target_X)
paired_target_X = list(paired_target_X)
while len(paired_target_X) < len(imgs):
paired_target_X += paired_target_X
paired_target_X = paired_target_X[:len(imgs)]
return np.array(paired_target_X)
class CloakData(object): class CloakData(object):
def __init__(self, dataset, img_shape=(224, 224), protect_class=None): def __init__(self, protect_directory=None, img_shape=(224, 224)):
self.dataset = dataset
self.img_shape = img_shape self.img_shape = img_shape
self.train_data_dir, self.test_data_dir, self.number_classes, self.number_samples = get_dataset_path(dataset) # self.train_data_dir, self.test_data_dir, self.number_classes, self.number_samples = get_dataset_path(dataset)
self.all_labels = sorted(list(os.listdir(self.train_data_dir))) # self.all_labels = sorted(list(os.listdir(self.train_data_dir)))
if protect_class: self.protect_directory = protect_directory
self.protect_class = protect_class
else:
self.protect_class = random.choice(self.all_labels)
self.sybil_class = random.choice([l for l in self.all_labels if l != self.protect_class]) self.protect_X = self.load_label_data(self.protect_directory)
self.protect_train_X, self.protect_test_X = self.load_label_data(self.protect_class)
self.sybil_train_X, self.sybil_test_X = self.load_label_data(self.sybil_class)
self.cloaked_protect_train_X = None self.cloaked_protect_train_X = None
self.cloaked_sybil_train_X = None
self.label2path_train, self.label2path_test, self.path2idx = self.build_data_mapping() self.label2path_train, self.label2path_test, self.path2idx = self.build_data_mapping()
self.all_training_path = self.get_all_data_path(self.label2path_train) self.all_training_path = self.get_all_data_path(self.label2path_train)
self.all_test_path = self.get_all_data_path(self.label2path_test) self.all_test_path = self.get_all_data_path(self.label2path_test)
self.protect_class_path = self.get_class_image_files(os.path.join(self.train_data_dir, self.protect_class)) self.protect_class_path = self.get_class_image_files(os.path.join(self.train_data_dir, self.protect_class))
self.sybil_class_path = self.get_class_image_files(os.path.join(self.train_data_dir, self.sybil_class))
print("Find {} protect images".format(len(self.protect_class_path)))
def get_class_image_files(self, path): def get_class_image_files(self, path):
return [os.path.join(path, f) for f in os.listdir(path)] return [os.path.join(path, f) for f in os.listdir(path)]

View File

@ -43,4 +43,15 @@ The code will output a directory in `results/` with `cloak_data.p` inside. You c
#### Evaluate Cloak Effectiveness #### Evaluate Cloak Effectiveness
To evaluate the cloak, run `python3 fawkes/eval_cloak.py --gpu 0 --cloak_data PATH-TO-RESULT-DIRECTORY --transfer_model vggface2_inception_extract`. To evaluate the cloak, run `python3 fawkes/eval_cloak.py --gpu 0 --cloak_data PATH-TO-RESULT-DIRECTORY --transfer_model vggface2_inception_extract`.
The code will print out the tracker model accuracy on uncloaked/original test images of the protected user, which should be close to 0. The code will print out the tracker model accuracy on uncloaked/original test images of the protected user, which should be close to 0.
### Citation
```
@inproceedings{shan2020fawkes,
title={Fawkes: Protecting Personal Privacy against Unauthorized Deep Learning Models},
author={Shan, Shawn and Wenger, Emily and Zhang, Jiayun and Li, Huiying and Zheng, Haitao and Zhao, Ben Y},
booktitle="Proc. of USENIX Security",
year={2020}
}
```

0
fawkes_dev/__init__.py Normal file
View File

View File

@ -0,0 +1,431 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Date : 2020-05-17
# @Author : Shawn Shan (shansixiong@cs.uchicago.edu)
# @Link : https://www.shawnshan.com/
import datetime
import time
from decimal import Decimal
import numpy as np
import tensorflow as tf
from utils import preprocess, reverse_preprocess
class FawkesMaskGeneration:
# if the attack is trying to mimic a target image or a neuron vector
MIMIC_IMG = True
# number of iterations to perform gradient descent
MAX_ITERATIONS = 10000
# larger values converge faster to less accurate results
LEARNING_RATE = 1e-2
# the initial constant c to pick as a first guess
INITIAL_CONST = 1
# pixel intensity range
INTENSITY_RANGE = 'imagenet'
# threshold for distance
L_THRESHOLD = 0.03
# whether keep the final result or the best result
KEEP_FINAL = False
# max_val of image
MAX_VAL = 255
# The following variables are used by DSSIM, should keep as default
# filter size in SSIM
FILTER_SIZE = 11
# filter sigma in SSIM
FILTER_SIGMA = 1.5
# weights used in MS-SSIM
SCALE_WEIGHTS = None
MAXIMIZE = False
IMAGE_SHAPE = (224, 224, 3)
RATIO = 1.0
LIMIT_DIST = False
def __init__(self, sess, bottleneck_model_ls, mimic_img=MIMIC_IMG,
batch_size=1, learning_rate=LEARNING_RATE,
max_iterations=MAX_ITERATIONS, initial_const=INITIAL_CONST,
intensity_range=INTENSITY_RANGE, l_threshold=L_THRESHOLD,
max_val=MAX_VAL, keep_final=KEEP_FINAL, maximize=MAXIMIZE, image_shape=IMAGE_SHAPE,
verbose=0, ratio=RATIO, limit_dist=LIMIT_DIST):
assert intensity_range in {'raw', 'imagenet', 'inception', 'mnist'}
# constant used for tanh transformation to avoid corner cases
self.tanh_constant = 2 - 1e-6
self.sess = sess
self.MIMIC_IMG = mimic_img
self.LEARNING_RATE = learning_rate
self.MAX_ITERATIONS = max_iterations
self.initial_const = initial_const
self.batch_size = batch_size
self.intensity_range = intensity_range
self.l_threshold = l_threshold
self.max_val = max_val
self.keep_final = keep_final
self.verbose = verbose
self.maximize = maximize
self.learning_rate = learning_rate
self.ratio = ratio
self.limit_dist = limit_dist
self.single_shape = list(image_shape)
self.input_shape = tuple([self.batch_size] + self.single_shape)
self.bottleneck_shape = tuple([self.batch_size] + self.single_shape)
# the variable we're going to optimize over
self.modifier = tf.Variable(np.zeros(self.input_shape, dtype=np.float32))
# target image in tanh space
if self.MIMIC_IMG:
self.timg_tanh = tf.Variable(np.zeros(self.input_shape), dtype=np.float32)
else:
self.bottleneck_t_raw = tf.Variable(np.zeros(self.bottleneck_shape), dtype=np.float32)
# source image in tanh space
self.simg_tanh = tf.Variable(np.zeros(self.input_shape), dtype=np.float32)
self.const = tf.Variable(np.ones(batch_size), dtype=np.float32)
self.mask = tf.Variable(np.ones((batch_size), dtype=np.bool))
self.weights = tf.Variable(np.ones(self.bottleneck_shape,
dtype=np.float32))
# and here's what we use to assign them
self.assign_modifier = tf.placeholder(tf.float32, self.input_shape)
if self.MIMIC_IMG:
self.assign_timg_tanh = tf.placeholder(
tf.float32, self.input_shape)
else:
self.assign_bottleneck_t_raw = tf.placeholder(
tf.float32, self.bottleneck_shape)
self.assign_simg_tanh = tf.placeholder(tf.float32, self.input_shape)
self.assign_const = tf.placeholder(tf.float32, (batch_size))
self.assign_mask = tf.placeholder(tf.bool, (batch_size))
self.assign_weights = tf.placeholder(tf.float32, self.bottleneck_shape)
# the resulting image, tanh'd to keep bounded from -0.5 to 0.5
# adversarial image in raw space
self.aimg_raw = (tf.tanh(self.modifier + self.simg_tanh) /
self.tanh_constant +
0.5) * 255.0
# source image in raw space
self.simg_raw = (tf.tanh(self.simg_tanh) /
self.tanh_constant +
0.5) * 255.0
if self.MIMIC_IMG:
# target image in raw space
self.timg_raw = (tf.tanh(self.timg_tanh) /
self.tanh_constant +
0.5) * 255.0
# convert source and adversarial image into input space
if self.intensity_range == 'imagenet':
mean = tf.constant(np.repeat([[[[103.939, 116.779, 123.68]]]], self.batch_size, axis=0), dtype=tf.float32,
name='img_mean')
self.aimg_input = (self.aimg_raw[..., ::-1] - mean)
self.simg_input = (self.simg_raw[..., ::-1] - mean)
if self.MIMIC_IMG:
self.timg_input = (self.timg_raw[..., ::-1] - mean)
elif self.intensity_range == 'raw':
self.aimg_input = self.aimg_raw
self.simg_input = self.simg_raw
if self.MIMIC_IMG:
self.timg_input = self.timg_raw
def batch_gen_DSSIM(aimg_raw_split, simg_raw_split):
msssim_split = tf.image.ssim(aimg_raw_split, simg_raw_split, max_val=255.0)
dist = (1.0 - tf.stack(msssim_split)) / 2.0
return dist
# raw value of DSSIM distance
self.dist_raw = batch_gen_DSSIM(self.aimg_raw, self.simg_raw)
# distance value after applying threshold
self.dist = tf.maximum(self.dist_raw - self.l_threshold, 0.0)
self.dist_raw_sum = tf.reduce_sum(
tf.where(self.mask,
self.dist_raw,
tf.zeros_like(self.dist_raw)))
self.dist_sum = tf.reduce_sum(tf.where(self.mask, self.dist, tf.zeros_like(self.dist)))
def resize_tensor(input_tensor, model_input_shape):
if input_tensor.shape[1:] == model_input_shape or model_input_shape[1] is None:
return input_tensor
resized_tensor = tf.image.resize(input_tensor, model_input_shape[:2])
return resized_tensor
def calculate_direction(bottleneck_model, cur_timg_input, cur_simg_input):
target_features = bottleneck_model(cur_timg_input)
return target_features
# target_center = tf.reduce_mean(target_features, axis=0)
# original = bottleneck_model(cur_simg_input)
# original_center = tf.reduce_mean(original, axis=0)
# direction = target_center - original_center
# final_target = original + self.ratio * direction
# return final_target
self.bottlesim = 0.0
self.bottlesim_sum = 0.0
self.bottlesim_push = 0.0
for bottleneck_model in bottleneck_model_ls:
model_input_shape = bottleneck_model.input_shape[1:]
cur_aimg_input = resize_tensor(self.aimg_input, model_input_shape)
self.bottleneck_a = bottleneck_model(cur_aimg_input)
if self.MIMIC_IMG:
# cur_timg_input = resize_tensor(self.timg_input, model_input_shape)
# cur_simg_input = resize_tensor(self.simg_input, model_input_shape)
cur_timg_input = self.timg_input
cur_simg_input = self.simg_input
self.bottleneck_t = calculate_direction(bottleneck_model, cur_timg_input, cur_simg_input)
# self.bottleneck_t = bottleneck_model(cur_timg_input)
else:
self.bottleneck_t = self.bottleneck_t_raw
bottleneck_diff = self.bottleneck_t - self.bottleneck_a
scale_factor = tf.sqrt(tf.reduce_sum(tf.square(self.bottleneck_t), axis=1))
cur_bottlesim = tf.sqrt(tf.reduce_sum(tf.square(bottleneck_diff), axis=1))
cur_bottlesim = cur_bottlesim / scale_factor
cur_bottlesim_sum = tf.reduce_sum(cur_bottlesim)
self.bottlesim += cur_bottlesim
# self.bottlesim_push += cur_bottlesim_push_sum
self.bottlesim_sum += cur_bottlesim_sum
# sum up the losses
if self.maximize:
self.loss = self.const * tf.square(self.dist) - self.bottlesim
else:
self.loss = self.const * tf.square(self.dist) + self.bottlesim
self.loss_sum = tf.reduce_sum(tf.where(self.mask, self.loss, tf.zeros_like(self.loss)))
# Setup the Adadelta optimizer and keep track of variables
# we're creating
start_vars = set(x.name for x in tf.global_variables())
self.learning_rate_holder = tf.placeholder(tf.float32, shape=[])
# optimizer = tf.train.AdadeltaOptimizer(self.learning_rate_holder)
optimizer = tf.train.AdamOptimizer(self.learning_rate_holder)
self.train = optimizer.minimize(self.loss_sum,
var_list=[self.modifier])
end_vars = tf.global_variables()
new_vars = [x for x in end_vars if x.name not in start_vars]
# these are the variables to initialize when we run
self.setup = []
self.setup.append(self.modifier.assign(self.assign_modifier))
if self.MIMIC_IMG:
self.setup.append(self.timg_tanh.assign(self.assign_timg_tanh))
else:
self.setup.append(self.bottleneck_t_raw.assign(
self.assign_bottleneck_t_raw))
self.setup.append(self.simg_tanh.assign(self.assign_simg_tanh))
self.setup.append(self.const.assign(self.assign_const))
self.setup.append(self.mask.assign(self.assign_mask))
self.setup.append(self.weights.assign(self.assign_weights))
self.init = tf.variables_initializer(var_list=[self.modifier] + new_vars)
print('Attacker loaded')
def preprocess_arctanh(self, imgs):
imgs = reverse_preprocess(imgs, self.intensity_range)
imgs /= 255.0
imgs -= 0.5
imgs *= self.tanh_constant
tanh_imgs = np.arctanh(imgs)
return tanh_imgs
def clipping(self, imgs):
imgs = reverse_preprocess(imgs, self.intensity_range)
imgs = np.clip(imgs, 0, self.max_val)
imgs = np.rint(imgs)
imgs = preprocess(imgs, self.intensity_range)
return imgs
def attack(self, source_imgs, target_imgs, weights=None):
if weights is None:
weights = np.ones([source_imgs.shape[0]] +
list(self.bottleneck_shape[1:]))
assert weights.shape[1:] == self.bottleneck_shape[1:]
assert source_imgs.shape[1:] == self.input_shape[1:]
assert source_imgs.shape[0] == weights.shape[0]
if self.MIMIC_IMG:
assert target_imgs.shape[1:] == self.input_shape[1:]
assert source_imgs.shape[0] == target_imgs.shape[0]
else:
assert target_imgs.shape[1:] == self.bottleneck_shape[1:]
assert source_imgs.shape[0] == target_imgs.shape[0]
start_time = time.time()
adv_imgs = []
print('%d batches in total'
% int(np.ceil(len(source_imgs) / self.batch_size)))
for idx in range(0, len(source_imgs), self.batch_size):
print('processing batch %d at %s' % (idx, datetime.datetime.now()))
adv_img = self.attack_batch(source_imgs[idx:idx + self.batch_size],
target_imgs[idx:idx + self.batch_size],
weights[idx:idx + self.batch_size])
adv_imgs.extend(adv_img)
elapsed_time = time.time() - start_time
print('attack cost %f s' % (elapsed_time))
return np.array(adv_imgs)
def attack_batch(self, source_imgs, target_imgs, weights):
"""
Run the attack on a batch of images and labels.
"""
LR = self.learning_rate
nb_imgs = source_imgs.shape[0]
mask = [True] * nb_imgs + [False] * (self.batch_size - nb_imgs)
mask = np.array(mask, dtype=np.bool)
source_imgs = np.array(source_imgs)
target_imgs = np.array(target_imgs)
# convert to tanh-space
simg_tanh = self.preprocess_arctanh(source_imgs)
if self.MIMIC_IMG:
timg_tanh = self.preprocess_arctanh(target_imgs)
else:
timg_tanh = target_imgs
CONST = np.ones(self.batch_size) * self.initial_const
self.sess.run(self.init)
simg_tanh_batch = np.zeros(self.input_shape)
if self.MIMIC_IMG:
timg_tanh_batch = np.zeros(self.input_shape)
else:
timg_tanh_batch = np.zeros(self.bottleneck_shape)
weights_batch = np.zeros(self.bottleneck_shape)
simg_tanh_batch[:nb_imgs] = simg_tanh[:nb_imgs]
timg_tanh_batch[:nb_imgs] = timg_tanh[:nb_imgs]
weights_batch[:nb_imgs] = weights[:nb_imgs]
modifier_batch = np.ones(self.input_shape) * 1e-6
# set the variables so that we don't have to send them over again
if self.MIMIC_IMG:
self.sess.run(self.setup,
{self.assign_timg_tanh: timg_tanh_batch,
self.assign_simg_tanh: simg_tanh_batch,
self.assign_const: CONST,
self.assign_mask: mask,
self.assign_weights: weights_batch,
self.assign_modifier: modifier_batch})
else:
# if directly mimicking a vector, use assign_bottleneck_t_raw
# in setup
self.sess.run(self.setup,
{self.assign_bottleneck_t_raw: timg_tanh_batch,
self.assign_simg_tanh: simg_tanh_batch,
self.assign_const: CONST,
self.assign_mask: mask,
self.assign_weights: weights_batch,
self.assign_modifier: modifier_batch})
best_bottlesim = [0] * nb_imgs if self.maximize else [np.inf] * nb_imgs
best_adv = np.zeros_like(source_imgs)
if self.verbose == 1:
loss_sum = float(self.sess.run(self.loss_sum))
dist_sum = float(self.sess.run(self.dist_sum))
thresh_over = (dist_sum / self.batch_size / self.l_threshold * 100)
dist_raw_sum = float(self.sess.run(self.dist_raw_sum))
bottlesim_sum = self.sess.run(self.bottlesim_sum)
print('START: Total loss: %.4E; perturb: %.6f (%.2f%% over, raw: %.6f); sim: %f'
% (Decimal(loss_sum),
dist_sum,
thresh_over,
dist_raw_sum,
bottlesim_sum / nb_imgs))
try:
total_distance = [0] * nb_imgs
if self.limit_dist:
dist_raw_list, bottlesim_list, aimg_input_list = self.sess.run(
[self.dist_raw,
self.bottlesim,
self.aimg_input])
for e, (dist_raw, bottlesim, aimg_input) in enumerate(
zip(dist_raw_list, bottlesim_list, aimg_input_list)):
if e >= nb_imgs:
break
total_distance[e] = bottlesim
for iteration in range(self.MAX_ITERATIONS):
self.sess.run([self.train], feed_dict={self.learning_rate_holder: LR})
dist_raw_list, bottlesim_list, aimg_input_list = self.sess.run(
[self.dist_raw,
self.bottlesim,
self.aimg_input])
for e, (dist_raw, bottlesim, aimg_input) in enumerate(
zip(dist_raw_list, bottlesim_list, aimg_input_list)):
if e >= nb_imgs:
break
if (bottlesim < best_bottlesim[e] and bottlesim > total_distance[e] * 0.1 and (
not self.maximize)) or (
bottlesim > best_bottlesim[e] and self.maximize):
best_bottlesim[e] = bottlesim
best_adv[e] = aimg_input
if iteration != 0 and iteration % (self.MAX_ITERATIONS // 3) == 0:
LR = LR / 2
print("Learning Rate: ", LR)
if iteration % (self.MAX_ITERATIONS // 10) == 0:
if self.verbose == 1:
loss_sum = float(self.sess.run(self.loss_sum))
dist_sum = float(self.sess.run(self.dist_sum))
thresh_over = (dist_sum /
self.batch_size /
self.l_threshold *
100)
dist_raw_sum = float(self.sess.run(self.dist_raw_sum))
bottlesim_sum = self.sess.run(self.bottlesim_sum)
print('ITER %4d: Total loss: %.4E; perturb: %.6f (%.2f%% over, raw: %.6f); sim: %f'
% (iteration,
Decimal(loss_sum),
dist_sum,
thresh_over,
dist_raw_sum,
bottlesim_sum / nb_imgs))
except KeyboardInterrupt:
pass
if self.verbose == 1:
loss_sum = float(self.sess.run(self.loss_sum))
dist_sum = float(self.sess.run(self.dist_sum))
thresh_over = (dist_sum / self.batch_size / self.l_threshold * 100)
dist_raw_sum = float(self.sess.run(self.dist_raw_sum))
bottlesim_sum = float(self.sess.run(self.bottlesim_sum))
print('END: Total loss: %.4E; perturb: %.6f (%.2f%% over, raw: %.6f); sim: %f'
% (Decimal(loss_sum),
dist_sum,
thresh_over,
dist_raw_sum,
bottlesim_sum / nb_imgs))
best_adv = self.clipping(best_adv[:nb_imgs])
return best_adv

95
fawkes_dev/protection.py Normal file
View File

@ -0,0 +1,95 @@
import argparse
import os
import pickle
import random
import sys
import numpy as np
from differentiator import FawkesMaskGeneration
from tensorflow import set_random_seed
from utils import load_extractor, CloakData, init_gpu
random.seed(12243)
np.random.seed(122412)
set_random_seed(12242)
NUM_IMG_PROTECTED = 32 # Number of images used to optimize the target class
BATCH_SIZE = 32
MAX_ITER = 1000
def diff_protected_data(sess, feature_extractors_ls, image_X, number_protect, target_X=None, th=0.01):
image_X = image_X[:number_protect]
differentiator = FawkesMaskGeneration(sess, feature_extractors_ls,
batch_size=BATCH_SIZE,
mimic_img=True,
intensity_range='imagenet',
initial_const=args.sd,
learning_rate=args.lr,
max_iterations=MAX_ITER,
l_threshold=th,
verbose=1, maximize=False, keep_final=False, image_shape=image_X.shape[1:])
if len(target_X) < len(image_X):
target_X = np.concatenate([target_X, target_X, target_X])
target_X = target_X[:len(image_X)]
cloaked_image_X = differentiator.attack(image_X, target_X)
return cloaked_image_X
def perform_defense():
RES = {}
sess = init_gpu(args.gpu)
FEATURE_EXTRACTORS = [args.feature_extractor]
RES_DIR = '../results/'
RES['num_img_protected'] = NUM_IMG_PROTECTED
RES['extractors'] = FEATURE_EXTRACTORS
num_protect = NUM_IMG_PROTECTED
print("Loading {} for optimization".format(args.feature_extractor))
feature_extractors_ls = [load_extractor(name) for name in FEATURE_EXTRACTORS]
protect_class = args.protect_class
cloak_data = CloakData(args.dataset, protect_class=protect_class)
RES_FILE_NAME = "{}_{}_protect{}".format(args.dataset, args.feature_extractor, cloak_data.protect_class)
RES_FILE_NAME = os.path.join(RES_DIR, RES_FILE_NAME)
print("Protect Class: ", cloak_data.protect_class)
cloak_data.target_path, cloak_data.target_data = cloak_data.select_target_label(feature_extractors_ls,
FEATURE_EXTRACTORS)
os.makedirs(RES_DIR, exist_ok=True)
os.makedirs(RES_FILE_NAME, exist_ok=True)
cloak_image_X = diff_protected_data(sess, feature_extractors_ls, cloak_data.protect_train_X,
number_protect=num_protect,
target_X=cloak_data.target_data, th=args.th)
cloak_data.cloaked_protect_train_X = cloak_image_X
RES['cloak_data'] = cloak_data
pickle.dump(RES, open(os.path.join(RES_FILE_NAME, 'cloak_data.p'), "wb"))
def parse_arguments(argv):
parser = argparse.ArgumentParser()
parser.add_argument('--gpu', type=str,
help='GPU id', default='0')
parser.add_argument('--dataset', type=str,
help='name of dataset', default='scrub')
parser.add_argument('--feature-extractor', type=str,
help="name of the feature extractor used for optimization",
default="webface_dense_robust_extract")
parser.add_argument('--th', type=float, default=0.007)
parser.add_argument('--sd', type=int, default=1e5)
parser.add_argument('--protect_class', type=str, default=None)
parser.add_argument('--lr', type=float, default=0.1)
return parser.parse_args(argv)
if __name__ == '__main__':
args = parse_arguments(sys.argv[1:])
perform_defense()

372
fawkes_dev/utils.py Normal file
View File

@ -0,0 +1,372 @@
import json
import os
import pickle
import random
import keras
import keras.backend as K
import numpy as np
import tensorflow as tf
from keras.applications.vgg16 import preprocess_input
from keras.layers import Dense, Activation
from keras.models import Model
from keras.preprocessing import image
from keras.utils import to_categorical
from sklearn.metrics import pairwise_distances
def clip_img(X, preprocessing='raw'):
X = reverse_preprocess(X, preprocessing)
X = np.clip(X, 0.0, 255.0)
X = preprocess(X, preprocessing)
return X
def dump_dictionary_as_json(dict, outfile):
j = json.dumps(dict)
with open(outfile, "wb") as f:
f.write(j.encode())
def fix_gpu_memory(mem_fraction=1):
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=mem_fraction)
tf_config = tf.ConfigProto(gpu_options=gpu_options)
tf_config.gpu_options.allow_growth = True
tf_config.log_device_placement = False
init_op = tf.global_variables_initializer()
sess = tf.Session(config=tf_config)
sess.run(init_op)
K.set_session(sess)
return sess
def load_victim_model(number_classes, teacher_model=None, end2end=False):
for l in teacher_model.layers:
l.trainable = end2end
x = teacher_model.layers[-1].output
x = Dense(number_classes)(x)
x = Activation('softmax', name="act")(x)
model = Model(teacher_model.input, x)
opt = keras.optimizers.Adadelta()
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
return model
def init_gpu(gpu_index, force=False):
if isinstance(gpu_index, list):
gpu_num = ','.join([str(i) for i in gpu_index])
else:
gpu_num = str(gpu_index)
if "CUDA_VISIBLE_DEVICES" in os.environ and os.environ["CUDA_VISIBLE_DEVICES"] and not force:
print('GPU already initiated')
return
os.environ["CUDA_VISIBLE_DEVICES"] = gpu_num
sess = fix_gpu_memory()
return sess
def preprocess(X, method):
assert method in {'raw', 'imagenet', 'inception', 'mnist'}
if method is 'raw':
pass
elif method is 'imagenet':
X = imagenet_preprocessing(X)
else:
raise Exception('unknown method %s' % method)
return X
def reverse_preprocess(X, method):
assert method in {'raw', 'imagenet', 'inception', 'mnist'}
if method is 'raw':
pass
elif method is 'imagenet':
X = imagenet_reverse_preprocessing(X)
else:
raise Exception('unknown method %s' % method)
return X
def imagenet_preprocessing(x, data_format=None):
if data_format is None:
data_format = K.image_data_format()
assert data_format in ('channels_last', 'channels_first')
x = np.array(x)
if data_format == 'channels_first':
# 'RGB'->'BGR'
if x.ndim == 3:
x = x[::-1, ...]
else:
x = x[:, ::-1, ...]
else:
# 'RGB'->'BGR'
x = x[..., ::-1]
mean = [103.939, 116.779, 123.68]
std = None
# Zero-center by mean pixel
if data_format == 'channels_first':
if x.ndim == 3:
x[0, :, :] -= mean[0]
x[1, :, :] -= mean[1]
x[2, :, :] -= mean[2]
if std is not None:
x[0, :, :] /= std[0]
x[1, :, :] /= std[1]
x[2, :, :] /= std[2]
else:
x[:, 0, :, :] -= mean[0]
x[:, 1, :, :] -= mean[1]
x[:, 2, :, :] -= mean[2]
if std is not None:
x[:, 0, :, :] /= std[0]
x[:, 1, :, :] /= std[1]
x[:, 2, :, :] /= std[2]
else:
x[..., 0] -= mean[0]
x[..., 1] -= mean[1]
x[..., 2] -= mean[2]
if std is not None:
x[..., 0] /= std[0]
x[..., 1] /= std[1]
x[..., 2] /= std[2]
return x
def imagenet_reverse_preprocessing(x, data_format=None):
import keras.backend as K
x = np.array(x)
if data_format is None:
data_format = K.image_data_format()
assert data_format in ('channels_last', 'channels_first')
if data_format == 'channels_first':
if x.ndim == 3:
# Zero-center by mean pixel
x[0, :, :] += 103.939
x[1, :, :] += 116.779
x[2, :, :] += 123.68
# 'BGR'->'RGB'
x = x[::-1, :, :]
else:
x[:, 0, :, :] += 103.939
x[:, 1, :, :] += 116.779
x[:, 2, :, :] += 123.68
x = x[:, ::-1, :, :]
else:
# Zero-center by mean pixel
x[..., 0] += 103.939
x[..., 1] += 116.779
x[..., 2] += 123.68
# 'BGR'->'RGB'
x = x[..., ::-1]
return x
def build_bottleneck_model(model, cut_off):
bottleneck_model = Model(model.input, model.get_layer(cut_off).output)
bottleneck_model.compile(loss='categorical_crossentropy',
optimizer='adam',
metrics=['accuracy'])
return bottleneck_model
def load_extractor(name):
model = keras.models.load_model("../feature_extractors/{}.h5".format(name))
if hasattr(model.layers[-1], "activation") and model.layers[-1].activation == "softmax":
raise Exception(
"Given extractor's last layer is softmax, need to remove the top layers to make it into a feature extractor")
# if "extract" in name.split("/")[-1]:
# pass
# else:
# print("Convert a model to a feature extractor")
# model = build_bottleneck_model(model, model.layers[layer_idx].name)
# model.save(name + "extract")
# model = keras.models.load_model(name + "extract")
return model
def get_dataset_path(dataset):
if not os.path.exists("config.json"):
raise Exception("Please config the datasets before running protection code. See more in README and config.py.")
config = json.load(open("config.json", 'r'))
if dataset not in config:
raise Exception(
"Dataset {} does not exist, please download to data/ and add the path to this function... Abort".format(
dataset))
return config[dataset]['train_dir'], config[dataset]['test_dir'], config[dataset]['num_classes'], config[dataset][
'num_images']
def normalize(x):
return x / np.linalg.norm(x, axis=1, keepdims=True)
class CloakData(object):
def __init__(self, dataset, img_shape=(224, 224), protect_class=None):
self.dataset = dataset
self.img_shape = img_shape
self.train_data_dir, self.test_data_dir, self.number_classes, self.number_samples = get_dataset_path(dataset)
self.all_labels = sorted(list(os.listdir(self.train_data_dir)))
if protect_class:
self.protect_class = protect_class
else:
self.protect_class = random.choice(self.all_labels)
self.sybil_class = random.choice([l for l in self.all_labels if l != self.protect_class])
self.protect_train_X, self.protect_test_X = self.load_label_data(self.protect_class)
self.sybil_train_X, self.sybil_test_X = self.load_label_data(self.sybil_class)
self.cloaked_protect_train_X = None
self.cloaked_sybil_train_X = None
self.label2path_train, self.label2path_test, self.path2idx = self.build_data_mapping()
self.all_training_path = self.get_all_data_path(self.label2path_train)
self.all_test_path = self.get_all_data_path(self.label2path_test)
self.protect_class_path = self.get_class_image_files(os.path.join(self.train_data_dir, self.protect_class))
self.sybil_class_path = self.get_class_image_files(os.path.join(self.train_data_dir, self.sybil_class))
print("Find {} protect images".format(len(self.protect_class_path)))
def get_class_image_files(self, path):
return [os.path.join(path, f) for f in os.listdir(path)]
def extractor_ls_predict(self, feature_extractors_ls, X):
feature_ls = []
for extractor in feature_extractors_ls:
cur_features = extractor.predict(X)
feature_ls.append(cur_features)
concated_feature_ls = np.concatenate(feature_ls, axis=1)
concated_feature_ls = normalize(concated_feature_ls)
return concated_feature_ls
def load_embeddings(self, feature_extractors_names):
dictionaries = []
for extractor_name in feature_extractors_names:
path2emb = pickle.load(open("../feature_extractors/embeddings/{}_emb_norm.p".format(extractor_name), "rb"))
dictionaries.append(path2emb)
merge_dict = {}
for k in dictionaries[0].keys():
cur_emb = [dic[k] for dic in dictionaries]
merge_dict[k] = np.concatenate(cur_emb)
return merge_dict
def select_target_label(self, feature_extractors_ls, feature_extractors_names, metric='l2'):
original_feature_x = self.extractor_ls_predict(feature_extractors_ls, self.protect_train_X)
path2emb = self.load_embeddings(feature_extractors_names)
items = list(path2emb.items())
paths = [p[0] for p in items]
embs = [p[1] for p in items]
embs = np.array(embs)
pair_dist = pairwise_distances(original_feature_x, embs, metric)
max_sum = np.min(pair_dist, axis=0)
sorted_idx = np.argsort(max_sum)[::-1]
highest_num = 0
paired_target_X = None
final_target_class_path = None
for idx in sorted_idx[:5]:
target_class_path = paths[idx]
cur_target_X = self.load_dir(target_class_path)
cur_target_X = np.concatenate([cur_target_X, cur_target_X, cur_target_X])
cur_tot_sum, cur_paired_target_X = self.calculate_dist_score(self.protect_train_X, cur_target_X,
feature_extractors_ls,
metric=metric)
if cur_tot_sum > highest_num:
highest_num = cur_tot_sum
paired_target_X = cur_paired_target_X
final_target_class_path = target_class_path
np.random.shuffle(paired_target_X)
return final_target_class_path, paired_target_X
def calculate_dist_score(self, a, b, feature_extractors_ls, metric='l2'):
features1 = self.extractor_ls_predict(feature_extractors_ls, a)
features2 = self.extractor_ls_predict(feature_extractors_ls, b)
pair_cos = pairwise_distances(features1, features2, metric)
max_sum = np.min(pair_cos, axis=0)
max_sum_arg = np.argsort(max_sum)[::-1]
max_sum_arg = max_sum_arg[:len(a)]
max_sum = [max_sum[i] for i in max_sum_arg]
paired_target_X = [b[j] for j in max_sum_arg]
paired_target_X = np.array(paired_target_X)
return np.min(max_sum), paired_target_X
def get_all_data_path(self, label2path):
all_paths = []
for k, v in label2path.items():
cur_all_paths = [os.path.join(k, cur_p) for cur_p in v]
all_paths.extend(cur_all_paths)
return all_paths
def load_label_data(self, label):
train_label_path = os.path.join(self.train_data_dir, label)
test_label_path = os.path.join(self.test_data_dir, label)
train_X = self.load_dir(train_label_path)
test_X = self.load_dir(test_label_path)
return train_X, test_X
def load_dir(self, path):
assert os.path.exists(path)
x_ls = []
for file in os.listdir(path):
cur_path = os.path.join(path, file)
im = image.load_img(cur_path, target_size=self.img_shape)
im = image.img_to_array(im)
x_ls.append(im)
raw_x = np.array(x_ls)
return preprocess_input(raw_x)
def build_data_mapping(self):
label2path_train = {}
label2path_test = {}
idx = 0
path2idx = {}
for label_name in self.all_labels:
full_path_train = os.path.join(self.train_data_dir, label_name)
full_path_test = os.path.join(self.test_data_dir, label_name)
label2path_train[full_path_train] = list(os.listdir(full_path_train))
label2path_test[full_path_test] = list(os.listdir(full_path_test))
for img_file in os.listdir(full_path_train):
path2idx[os.path.join(full_path_train, img_file)] = idx
for img_file in os.listdir(full_path_test):
path2idx[os.path.join(full_path_test, img_file)] = idx
idx += 1
return label2path_train, label2path_test, path2idx
def generate_data_post_cloak(self, sybil=False):
assert self.cloaked_protect_train_X is not None
while True:
batch_X = []
batch_Y = []
cur_batch_path = random.sample(self.all_training_path, 32)
for p in cur_batch_path:
cur_y = self.path2idx[p]
if p in self.protect_class_path:
cur_x = random.choice(self.cloaked_protect_train_X)
elif sybil and (p in self.sybil_class):
cur_x = random.choice(self.cloaked_sybil_train_X)
else:
im = image.load_img(p, target_size=self.img_shape)
im = image.img_to_array(im)
cur_x = preprocess_input(im)
batch_X.append(cur_x)
batch_Y.append(cur_y)
batch_X = np.array(batch_X)
batch_Y = to_categorical(np.array(batch_Y), num_classes=self.number_classes)
yield batch_X, batch_Y