2
0
mirror of https://github.com/Shawn-Shan/fawkes.git synced 2024-12-22 07:09:33 +05:30

add files

Former-commit-id: 8999421c9e34ca25384a3adcb879f4f5afdedda4 [formerly 0b6e5924e22331500a9279183550e81146029249]
Former-commit-id: 8bafbec9710aa552ecd5dbf010089eee435ae9c9
This commit is contained in:
Shawn-Shan 2020-07-02 12:32:46 -05:00
parent d7a25eb292
commit b1e7b67055
5 changed files with 600 additions and 300 deletions

View File

@ -0,0 +1,24 @@
# -*- coding: utf-8 -*-
# @Date : 2020-07-01
# @Author : Shawn Shan (shansixiong@cs.uchicago.edu)
# @Link : https://www.shawnshan.com/
__version__ = '0.0.2'
from .differentiator import FawkesMaskGeneration
from .utils import load_extractor, init_gpu, select_target_label, dump_image, reverse_process_cloaked, \
Faces
from .protection import main
import logging
import sys
import os
logging.getLogger('tensorflow').disabled = True
__all__ = (
'__version__',
'FawkesMaskGeneration', 'load_extractor',
'init_gpu',
'select_target_label', 'dump_image', 'reverse_process_cloaked', 'Faces', 'main'
)

View File

@ -10,7 +10,7 @@ from decimal import Decimal
import numpy as np
import tensorflow as tf
from utils import preprocess, reverse_preprocess
from .utils import preprocess, reverse_preprocess
class FawkesMaskGeneration:
@ -47,7 +47,7 @@ class FawkesMaskGeneration:
max_iterations=MAX_ITERATIONS, initial_const=INITIAL_CONST,
intensity_range=INTENSITY_RANGE, l_threshold=L_THRESHOLD,
max_val=MAX_VAL, keep_final=KEEP_FINAL, maximize=MAXIMIZE, image_shape=IMAGE_SHAPE,
verbose=0, ratio=RATIO, limit_dist=LIMIT_DIST):
verbose=0, ratio=RATIO, limit_dist=LIMIT_DIST, faces=None):
assert intensity_range in {'raw', 'imagenet', 'inception', 'mnist'}
@ -69,10 +69,12 @@ class FawkesMaskGeneration:
self.ratio = ratio
self.limit_dist = limit_dist
self.single_shape = list(image_shape)
self.faces = faces
self.input_shape = tuple([self.batch_size] + self.single_shape)
self.bottleneck_shape = tuple([self.batch_size] + self.single_shape)
# self.bottleneck_shape = tuple([self.batch_size, bottleneck_model_ls[0].output_shape[-1]])
# the variable we're going to optimize over
self.modifier = tf.Variable(np.zeros(self.input_shape, dtype=np.float32))
@ -149,8 +151,6 @@ class FawkesMaskGeneration:
self.dist_raw,
tf.zeros_like(self.dist_raw)))
self.dist_sum = tf.reduce_sum(tf.where(self.mask, self.dist, tf.zeros_like(self.dist)))
# self.dist_sum = 1e-5 * tf.reduce_sum(self.dist)
# self.dist_raw_sum = self.dist_sum
def resize_tensor(input_tensor, model_input_shape):
if input_tensor.shape[1:] == model_input_shape or model_input_shape[1] is None:
@ -171,16 +171,14 @@ class FawkesMaskGeneration:
self.bottleneck_a = bottleneck_model(cur_aimg_input)
if self.MIMIC_IMG:
# cur_timg_input = resize_tensor(self.timg_input, model_input_shape)
# cur_simg_input = resize_tensor(self.simg_input, model_input_shape)
cur_timg_input = self.timg_input
cur_simg_input = self.simg_input
self.bottleneck_t = calculate_direction(bottleneck_model, cur_timg_input, cur_simg_input)
# self.bottleneck_t = bottleneck_model(cur_timg_input)
else:
self.bottleneck_t = self.bottleneck_t_raw
bottleneck_diff = self.bottleneck_t - self.bottleneck_a
scale_factor = tf.sqrt(tf.reduce_sum(tf.square(self.bottleneck_t), axis=1))
cur_bottlesim = tf.sqrt(tf.reduce_sum(tf.square(bottleneck_diff), axis=1))
@ -189,7 +187,6 @@ class FawkesMaskGeneration:
self.bottlesim += cur_bottlesim
# self.bottlesim_push += cur_bottlesim_push_sum
self.bottlesim_sum += cur_bottlesim_sum
# sum up the losses
@ -202,20 +199,13 @@ class FawkesMaskGeneration:
self.loss,
tf.zeros_like(self.loss)))
# self.loss_sum = self.dist_sum + tf.reduce_sum(self.bottlesim)
# import pdb
# pdb.set_trace()
# self.loss_sum = tf.reduce_sum(tf.where(self.mask, self.loss, tf.zeros_like(self.loss)))
# Setup the Adadelta optimizer and keep track of variables
# we're creating
start_vars = set(x.name for x in tf.global_variables())
self.learning_rate_holder = tf.placeholder(tf.float32, shape=[])
optimizer = tf.train.AdadeltaOptimizer(self.learning_rate_holder)
# optimizer = tf.train.AdamOptimizer(self.learning_rate_holder)
self.train = optimizer.minimize(self.loss_sum,
var_list=[self.modifier])
self.train = optimizer.minimize(self.loss_sum, var_list=[self.modifier])
end_vars = tf.global_variables()
new_vars = [x for x in end_vars if x.name not in start_vars]
@ -297,6 +287,7 @@ class FawkesMaskGeneration:
LR = self.learning_rate
nb_imgs = source_imgs.shape[0]
mask = [True] * nb_imgs + [False] * (self.batch_size - nb_imgs)
# mask = [True] * self.batch_size
mask = np.array(mask, dtype=np.bool)
source_imgs = np.array(source_imgs)
@ -317,19 +308,34 @@ class FawkesMaskGeneration:
timg_tanh_batch = np.zeros(self.input_shape)
else:
timg_tanh_batch = np.zeros(self.bottleneck_shape)
weights_batch = np.zeros(self.bottleneck_shape)
simg_tanh_batch[:nb_imgs] = simg_tanh[:nb_imgs]
timg_tanh_batch[:nb_imgs] = timg_tanh[:nb_imgs]
weights_batch[:nb_imgs] = weights[:nb_imgs]
modifier_batch = np.ones(self.input_shape) * 1e-6
self.sess.run(self.setup,
{self.assign_timg_tanh: timg_tanh_batch,
self.assign_simg_tanh: simg_tanh_batch,
self.assign_const: CONST,
self.assign_mask: mask,
self.assign_weights: weights_batch,
self.assign_modifier: modifier_batch})
temp_images = []
# set the variables so that we don't have to send them over again
if self.MIMIC_IMG:
self.sess.run(self.setup,
{self.assign_timg_tanh: timg_tanh_batch,
self.assign_simg_tanh: simg_tanh_batch,
self.assign_const: CONST,
self.assign_mask: mask,
self.assign_weights: weights_batch,
self.assign_modifier: modifier_batch})
else:
# if directly mimicking a vector, use assign_bottleneck_t_raw
# in setup
self.sess.run(self.setup,
{self.assign_bottleneck_t_raw: timg_tanh_batch,
self.assign_simg_tanh: simg_tanh_batch,
self.assign_const: CONST,
self.assign_mask: mask,
self.assign_weights: weights_batch,
self.assign_modifier: modifier_batch})
best_bottlesim = [0] * nb_imgs if self.maximize else [np.inf] * nb_imgs
best_adv = np.zeros_like(source_imgs)
@ -347,6 +353,7 @@ class FawkesMaskGeneration:
dist_raw_sum,
bottlesim_sum / nb_imgs))
finished_idx = set()
try:
total_distance = [0] * nb_imgs
@ -369,8 +376,14 @@ class FawkesMaskGeneration:
[self.dist_raw,
self.bottlesim,
self.aimg_input])
all_clear = True
for e, (dist_raw, bottlesim, aimg_input) in enumerate(
zip(dist_raw_list, bottlesim_list, aimg_input_list)):
if e in finished_idx:
continue
if e >= nb_imgs:
break
if (bottlesim < best_bottlesim[e] and bottlesim > total_distance[e] * 0.1 and (
@ -379,40 +392,55 @@ class FawkesMaskGeneration:
best_bottlesim[e] = bottlesim
best_adv[e] = aimg_input
if iteration != 0 and iteration % (self.MAX_ITERATIONS // 3) == 0:
# LR = LR / 2
# if iteration > 20 and (dist_raw >= self.l_threshold or iteration == self.MAX_ITERATIONS - 1):
# finished_idx.add(e)
# print("{} finished at dist {}".format(e, dist_raw))
# best_bottlesim[e] = bottlesim
# best_adv[e] = aimg_input
#
all_clear = False
if all_clear:
break
if iteration != 0 and iteration % (self.MAX_ITERATIONS // 2) == 0:
LR = LR / 2
print("Learning Rate: ", LR)
if iteration % (self.MAX_ITERATIONS // 10) == 0:
if iteration % (self.MAX_ITERATIONS // 5) == 0:
if self.verbose == 1:
loss_sum = float(self.sess.run(self.loss_sum))
dist_sum = float(self.sess.run(self.dist_sum))
thresh_over = (dist_sum /
self.batch_size /
self.l_threshold *
100)
dist_raw_sum = float(self.sess.run(self.dist_raw_sum))
bottlesim_sum = self.sess.run(self.bottlesim_sum)
print('ITER %4d: Total loss: %.4E; perturb: %.6f (%.2f%% over, raw: %.6f); sim: %f'
% (iteration,
Decimal(loss_sum),
dist_sum,
thresh_over,
dist_raw_sum,
bottlesim_sum / nb_imgs))
print('ITER %4d perturb: %.5f; sim: %f'
% (iteration, dist_raw_sum / nb_imgs, bottlesim_sum / nb_imgs))
# protected_images = aimg_input_list
#
# orginal_images = np.copy(self.faces.cropped_faces)
# cloak_perturbation = reverse_process_cloaked(protected_images) - reverse_process_cloaked(
# orginal_images)
# final_images = self.faces.merge_faces(cloak_perturbation)
#
# for p_img, img in zip(protected_images, final_images):
# dump_image(reverse_process_cloaked(p_img),
# "/home/shansixioing/fawkes/data/emily/emily_cloaked_cropped{}.png".format(iteration),
# format='png')
#
# dump_image(img,
# "/home/shansixioing/fawkes/data/emily/emily_cloaked_{}.png".format(iteration),
# format='png')
except KeyboardInterrupt:
pass
if self.verbose == 1:
loss_sum = float(self.sess.run(self.loss_sum))
dist_sum = float(self.sess.run(self.dist_sum))
thresh_over = (dist_sum / self.batch_size / self.l_threshold * 100)
dist_raw_sum = float(self.sess.run(self.dist_raw_sum))
bottlesim_sum = float(self.sess.run(self.bottlesim_sum))
print('END: Total loss: %.4E; perturb: %.6f (%.2f%% over, raw: %.6f); sim: %f'
print('END: Total loss: %.4E; perturb: %.6f (raw: %.6f); sim: %f'
% (Decimal(loss_sum),
dist_sum,
thresh_over,
dist_raw_sum,
bottlesim_sum / nb_imgs))

View File

@ -1,3 +1,7 @@
# from __future__ import absolute_import
# from __future__ import division
# from __future__ import print_function
import argparse
import glob
import os
@ -5,106 +9,141 @@ import random
import sys
import numpy as np
from differentiator import FawkesMaskGeneration
from keras.applications.vgg16 import preprocess_input
from keras.preprocessing import image
from skimage.transform import resize
from tensorflow import set_random_seed
from utils import load_extractor, init_gpu, select_target_label, dump_image, reverse_process_cloaked
from .differentiator import FawkesMaskGeneration
from .utils import load_extractor, init_gpu, select_target_label, dump_image, reverse_process_cloaked, \
Faces
random.seed(12243)
np.random.seed(122412)
set_random_seed(12242)
BATCH_SIZE = 1
MAX_ITER = 1000
BATCH_SIZE = 32
def generate_cloak_images(sess, feature_extractors, image_X, target_X=None, th=0.01):
def generate_cloak_images(sess, feature_extractors, image_X, target_emb=None, th=0.01, faces=None, sd=1e9, lr=2,
max_step=500):
batch_size = BATCH_SIZE if len(image_X) > BATCH_SIZE else len(image_X)
differentiator = FawkesMaskGeneration(sess, feature_extractors,
batch_size=batch_size,
mimic_img=True,
intensity_range='imagenet',
initial_const=args.sd,
learning_rate=args.lr,
max_iterations=MAX_ITER,
initial_const=sd,
learning_rate=lr,
max_iterations=max_step,
l_threshold=th,
verbose=1, maximize=False, keep_final=False, image_shape=image_X.shape[1:])
verbose=1, maximize=False, keep_final=False, image_shape=image_X.shape[1:],
faces=faces)
cloaked_image_X = differentiator.attack(image_X, target_X)
cloaked_image_X = differentiator.attack(image_X, target_emb)
return cloaked_image_X
def extract_faces(img):
# foo
return preprocess_input(resize(img, (224, 224)))
def check_imgs(imgs):
if np.max(imgs) <= 1 and np.min(imgs) >= 0:
imgs = imgs * 255.0
elif np.max(imgs) <= 255 and np.min(imgs) >= 0:
pass
else:
raise Exception("Image values ")
return imgs
def fawkes():
assert os.path.exists(args.directory)
assert os.path.isdir(args.directory)
def main(*argv):
if not argv:
argv = list(sys.argv)
# attach SIGPIPE handler to properly handle broken pipe
try: # sigpipe not available under windows. just ignore in this case
import signal
signal.signal(signal.SIGPIPE, signal.SIG_DFL)
except Exception as e:
pass
parser = argparse.ArgumentParser()
parser.add_argument('--directory', '-d', type=str,
help='directory that contain images for cloaking', default='imgs/')
parser.add_argument('--gpu', type=str,
help='GPU id', default='0')
parser.add_argument('--mode', type=str,
help='cloak generation mode', default='high')
parser.add_argument('--feature-extractor', type=str,
help="name of the feature extractor used for optimization",
default="high_extract")
parser.add_argument('--th', type=float, default=0.01)
parser.add_argument('--max-step', type=int, default=500)
parser.add_argument('--sd', type=int, default=1e9)
parser.add_argument('--lr', type=float, default=2)
parser.add_argument('--separate_target', action='store_true')
parser.add_argument('--format', type=str,
help="final image format",
default="jpg")
args = parser.parse_args(argv[1:])
if args.mode == 'low':
args.feature_extractor = "high_extract"
args.th = 0.003
elif args.mode == 'mid':
args.feature_extractor = "high_extract"
args.th = 0.005
elif args.mode == 'high':
args.feature_extractor = "high_extract"
args.th = 0.007
elif args.mode == 'ultra':
args.feature_extractor = "high_extract"
args.th = 0.01
elif args.mode == 'custom':
pass
else:
raise Exception("mode must be one of 'low', 'mid', 'high', 'ultra', 'custom'")
assert args.format in ['png', 'jpg', 'jpeg']
if args.format == 'jpg':
args.format = 'jpeg'
sess = init_gpu(args.gpu)
print("Loading {} for optimization".format(args.feature_extractor))
feature_extractors_ls = [load_extractor(args.feature_extractor)]
fs_names = [args.feature_extractor]
feature_extractors_ls = [load_extractor(name) for name in fs_names]
image_paths = glob.glob(os.path.join(args.directory, "*"))
image_paths = [path for path in image_paths if "_cloaked" not in path.split("/")[-1]]
if not image_paths:
print("No images in the directory")
exit(1)
orginal_images = [extract_faces(image.img_to_array(image.load_img(cur_path))) for cur_path in
image_paths]
faces = Faces(image_paths, sess)
orginal_images = faces.cropped_faces
orginal_images = np.array(orginal_images)
if args.seperate_target:
target_images = []
if args.separate_target:
target_embedding = []
for org_img in orginal_images:
org_img = org_img.reshape([1] + list(org_img.shape))
tar_img = select_target_label(org_img, feature_extractors_ls, [args.feature_extractor])
target_images.append(tar_img)
target_images = np.concatenate(target_images)
tar_emb = select_target_label(org_img, feature_extractors_ls, fs_names)
target_embedding.append(tar_emb)
target_embedding = np.concatenate(target_embedding)
else:
target_images = select_target_label(orginal_images, feature_extractors_ls, [args.feature_extractor])
# file_name = args.directory.split("/")[-1]
# os.makedirs(args.result_directory, exist_ok=True)
# os.makedirs(os.path.join(args.result_directory, file_name), exist_ok=True)
target_embedding = select_target_label(orginal_images, feature_extractors_ls, fs_names)
protected_images = generate_cloak_images(sess, feature_extractors_ls, orginal_images,
target_X=target_images, th=args.th)
target_emb=target_embedding, th=args.th, faces=faces, sd=args.sd,
lr=args.lr, max_step=args.max_step)
for p_img, path in zip(protected_images, image_paths):
p_img = reverse_process_cloaked(p_img)
file_name = "{}_cloaked.jpeg".format(".".join(path.split(".")[:-1]))
dump_image(p_img, file_name, format="JPEG")
faces.cloaked_cropped_faces = protected_images
cloak_perturbation = reverse_process_cloaked(protected_images) - reverse_process_cloaked(orginal_images)
final_images = faces.merge_faces(cloak_perturbation)
def parse_arguments(argv):
parser = argparse.ArgumentParser()
parser.add_argument('--gpu', type=str,
help='GPU id', default='0')
parser.add_argument('--directory', type=str,
help='directory that contain images for cloaking', default='imgs/')
parser.add_argument('--feature-extractor', type=str,
help="name of the feature extractor used for optimization",
default="webface_dense_robust_extract")
parser.add_argument('--th', type=float, default=0.005)
parser.add_argument('--sd', type=int, default=1e9)
parser.add_argument('--protect_class', type=str, default=None)
parser.add_argument('--lr', type=float, default=1)
parser.add_argument('--result_directory', type=str, default="../results")
parser.add_argument('--seperate_target', action='store_true')
return parser.parse_args(argv)
for p_img, cloaked_img, path in zip(final_images, protected_images, image_paths):
file_name = "{}_{}_{}_cloaked.{}".format(".".join(path.split(".")[:-1]), args.mode, args.th, args.format)
dump_image(p_img, file_name, format=args.format)
if __name__ == '__main__':
args = parse_arguments(sys.argv[1:])
fawkes()
main(*sys.argv)

View File

@ -1,19 +1,30 @@
import glob
import gzip
import json
import os
import pickle
import random
import sys
stderr = sys.stderr
sys.stderr = open(os.devnull, 'w')
import keras
sys.stderr = stderr
import keras.backend as K
import numpy as np
import tensorflow as tf
from keras.applications.vgg16 import preprocess_input
from PIL import Image, ExifTags
# from keras.applications.vgg16 import preprocess_input
from keras.layers import Dense, Activation
from keras.models import Model
from keras.preprocessing import image
from keras.utils import to_categorical
from keras.utils import get_file
from skimage.transform import resize
from sklearn.metrics import pairwise_distances
from .align_face import align, aligner
def clip_img(X, preprocessing='raw'):
X = reverse_preprocess(X, preprocessing)
@ -22,6 +33,91 @@ def clip_img(X, preprocessing='raw'):
return X
def load_image(path):
img = Image.open(path)
if img._getexif() is not None:
for orientation in ExifTags.TAGS.keys():
if ExifTags.TAGS[orientation] == 'Orientation':
break
exif = dict(img._getexif().items())
if orientation in exif.keys():
if exif[orientation] == 3:
img = img.rotate(180, expand=True)
elif exif[orientation] == 6:
img = img.rotate(270, expand=True)
elif exif[orientation] == 8:
img = img.rotate(90, expand=True)
else:
pass
img = img.convert('RGB')
image_array = image.img_to_array(img)
return image_array
class Faces(object):
def __init__(self, image_paths, sess):
self.aligner = aligner(sess)
self.org_faces = []
self.cropped_faces = []
self.cropped_faces_shape = []
self.cropped_index = []
self.callback_idx = []
for i, p in enumerate(image_paths):
cur_img = load_image(p)
self.org_faces.append(cur_img)
align_img = align(cur_img, self.aligner, margin=0.7)
cur_faces = align_img[0]
cur_shapes = [f.shape[:-1] for f in cur_faces]
cur_faces_square = []
for img in cur_faces:
long_size = max([img.shape[1], img.shape[0]])
base = np.zeros((long_size, long_size, 3))
base[0:img.shape[0], 0:img.shape[1], :] = img
cur_faces_square.append(base)
cur_index = align_img[1]
cur_faces_square = [resize(f, (224, 224)) for f in cur_faces_square]
self.cropped_faces_shape.extend(cur_shapes)
self.cropped_faces.extend(cur_faces_square)
self.cropped_index.extend(cur_index)
self.callback_idx.extend([i] * len(cur_faces_square))
if not self.cropped_faces:
print("No faces detected")
exit(1)
self.cropped_faces = np.array(self.cropped_faces)
self.cropped_faces = preprocess(self.cropped_faces, 'imagenet')
self.cloaked_cropped_faces = None
self.cloaked_faces = np.copy(self.org_faces)
def get_faces(self):
return self.cropped_faces
def merge_faces(self, cloaks):
self.cloaked_faces = np.copy(self.org_faces)
for i in range(len(self.cropped_faces)):
cur_cloak = cloaks[i]
org_shape = self.cropped_faces_shape[i]
old_square_shape = max([org_shape[0], org_shape[1]])
reshape_cloak = resize(cur_cloak, (old_square_shape, old_square_shape))
reshape_cloak = reshape_cloak[0:org_shape[0], 0:org_shape[1], :]
callback_id = self.callback_idx[i]
bb = self.cropped_index[i]
self.cloaked_faces[callback_id][bb[1]:bb[3], bb[0]:bb[2], :] += reshape_cloak
return self.cloaked_faces
def dump_dictionary_as_json(dict, outfile):
j = json.dumps(dict)
with open(outfile, "wb") as f:
@ -30,10 +126,12 @@ def dump_dictionary_as_json(dict, outfile):
def fix_gpu_memory(mem_fraction=1):
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=mem_fraction)
tf_config = tf.ConfigProto(gpu_options=gpu_options)
tf_config.gpu_options.allow_growth = True
tf_config.log_device_placement = False
tf_config = None
if tf.test.is_gpu_available():
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=mem_fraction)
tf_config = tf.ConfigProto(gpu_options=gpu_options)
tf_config.gpu_options.allow_growth = True
tf_config.log_device_placement = False
init_op = tf.global_variables_initializer()
sess = tf.Session(config=tf_config)
sess.run(init_op)
@ -45,7 +143,6 @@ def load_victim_model(number_classes, teacher_model=None, end2end=False):
for l in teacher_model.layers:
l.trainable = end2end
x = teacher_model.layers[-1].output
x = Dense(number_classes)(x)
x = Activation('softmax', name="act")(x)
model = Model(teacher_model.input, x)
@ -141,6 +238,7 @@ def imagenet_preprocessing(x, data_format=None):
return x
def imagenet_reverse_preprocessing(x, data_format=None):
import keras.backend as K
x = np.array(x)
@ -185,7 +283,20 @@ def build_bottleneck_model(model, cut_off):
def load_extractor(name):
model = keras.models.load_model("../feature_extractors/{}.h5".format(name))
model_dir = os.path.join(os.path.expanduser('~'), '.fawkes')
os.makedirs(model_dir, exist_ok=True)
model_file = os.path.join(model_dir, "{}.h5".format(name))
if os.path.exists(model_file):
model = keras.models.load_model(model_file)
else:
get_file("{}.h5".format(name), "http://sandlab.cs.uchicago.edu/fawkes/files/{}.h5".format(name),
cache_dir=model_dir, cache_subdir='')
get_file("{}_emb.p.gz".format(name), "http://sandlab.cs.uchicago.edu/fawkes/files/{}_emb.p.gz".format(name),
cache_dir=model_dir, cache_subdir='')
model = keras.models.load_model(model_file)
if hasattr(model.layers[-1], "activation") and model.layers[-1].activation == "softmax":
raise Exception(
"Given extractor's last layer is softmax, need to remove the top layers to make it into a feature extractor")
@ -200,10 +311,11 @@ def load_extractor(name):
def get_dataset_path(dataset):
if not os.path.exists("config.json"):
model_dir = os.path.join(os.path.expanduser('~'), '.fawkes')
if not os.path.exists(os.path.join(model_dir, "config.json")):
raise Exception("Please config the datasets before running protection code. See more in README and config.py.")
config = json.load(open("config.json", 'r'))
config = json.load(open(os.path.join(model_dir, "config.json"), 'r'))
if dataset not in config:
raise Exception(
"Dataset {} does not exist, please download to data/ and add the path to this function... Abort".format(
@ -217,7 +329,8 @@ def normalize(x):
def dump_image(x, filename, format="png", scale=False):
img = image.array_to_img(x, scale=scale)
# img = image.array_to_img(x, scale=scale)
img = image.array_to_img(x)
img.save(filename, format)
return
@ -231,13 +344,17 @@ def load_dir(path):
im = image.img_to_array(im)
x_ls.append(im)
raw_x = np.array(x_ls)
return preprocess_input(raw_x)
return preprocess(raw_x, 'imagenet')
def load_embeddings(feature_extractors_names):
model_dir = os.path.join(os.path.expanduser('~'), '.fawkes')
dictionaries = []
for extractor_name in feature_extractors_names:
path2emb = pickle.load(open("../feature_extractors/embeddings/{}_emb_norm.p".format(extractor_name), "rb"))
fp = gzip.open(os.path.join(model_dir, "{}_emb.p.gz".format(extractor_name)), 'rb')
path2emb = pickle.load(fp)
fp.close()
dictionaries.append(path2emb)
merge_dict = {}
@ -272,6 +389,8 @@ def calculate_dist_score(a, b, feature_extractors_ls, metric='l2'):
def select_target_label(imgs, feature_extractors_ls, feature_extractors_names, metric='l2'):
model_dir = os.path.join(os.path.expanduser('~'), '.fawkes')
original_feature_x = extractor_ls_predict(feature_extractors_ls, imgs)
path2emb = load_embeddings(feature_extractors_names)
@ -282,178 +401,174 @@ def select_target_label(imgs, feature_extractors_ls, feature_extractors_names, m
pair_dist = pairwise_distances(original_feature_x, embs, metric)
max_sum = np.min(pair_dist, axis=0)
sorted_idx = np.argsort(max_sum)[::-1]
max_id = np.argmax(max_sum)
highest_num = 0
paired_target_X = None
final_target_class_path = None
for idx in sorted_idx[:1]:
target_class_path = paths[idx]
cur_target_X = load_dir(target_class_path)
cur_target_X = np.concatenate([cur_target_X, cur_target_X, cur_target_X])
cur_tot_sum, cur_paired_target_X = calculate_dist_score(imgs, cur_target_X,
feature_extractors_ls,
metric=metric)
if cur_tot_sum > highest_num:
highest_num = cur_tot_sum
paired_target_X = cur_paired_target_X
target_data_id = paths[int(max_id)]
image_dir = os.path.join(model_dir, "target_data/{}/*".format(target_data_id))
if not os.path.exists(image_dir):
get_file("{}.h5".format(name), "http://sandlab.cs.uchicago.edu/fawkes/files/target_images".format(name),
cache_dir=model_dir, cache_subdir='')
np.random.shuffle(paired_target_X)
paired_target_X = list(paired_target_X)
while len(paired_target_X) < len(imgs):
paired_target_X += paired_target_X
image_paths = glob.glob(image_dir)
paired_target_X = paired_target_X[:len(imgs)]
return np.array(paired_target_X)
target_images = [image.img_to_array(image.load_img(cur_path)) for cur_path in
image_paths]
target_images = np.array([resize(x, (224, 224)) for x in target_images])
target_images = preprocess(target_images, 'imagenet')
target_images = list(target_images)
while len(target_images) < len(imgs):
target_images += target_images
class CloakData(object):
def __init__(self, protect_directory=None, img_shape=(224, 224)):
target_images = random.sample(target_images, len(imgs))
return np.array(target_images)
self.img_shape = img_shape
# self.train_data_dir, self.test_data_dir, self.number_classes, self.number_samples = get_dataset_path(dataset)
# self.all_labels = sorted(list(os.listdir(self.train_data_dir)))
self.protect_directory = protect_directory
self.protect_X = self.load_label_data(self.protect_directory)
self.cloaked_protect_train_X = None
self.label2path_train, self.label2path_test, self.path2idx = self.build_data_mapping()
self.all_training_path = self.get_all_data_path(self.label2path_train)
self.all_test_path = self.get_all_data_path(self.label2path_test)
self.protect_class_path = self.get_class_image_files(os.path.join(self.train_data_dir, self.protect_class))
def get_class_image_files(self, path):
return [os.path.join(path, f) for f in os.listdir(path)]
def extractor_ls_predict(self, feature_extractors_ls, X):
feature_ls = []
for extractor in feature_extractors_ls:
cur_features = extractor.predict(X)
feature_ls.append(cur_features)
concated_feature_ls = np.concatenate(feature_ls, axis=1)
concated_feature_ls = normalize(concated_feature_ls)
return concated_feature_ls
def load_embeddings(self, feature_extractors_names):
dictionaries = []
for extractor_name in feature_extractors_names:
path2emb = pickle.load(open("../feature_extractors/embeddings/{}_emb_norm.p".format(extractor_name), "rb"))
dictionaries.append(path2emb)
merge_dict = {}
for k in dictionaries[0].keys():
cur_emb = [dic[k] for dic in dictionaries]
merge_dict[k] = np.concatenate(cur_emb)
return merge_dict
def select_target_label(self, feature_extractors_ls, feature_extractors_names, metric='l2'):
original_feature_x = self.extractor_ls_predict(feature_extractors_ls, self.protect_train_X)
path2emb = self.load_embeddings(feature_extractors_names)
items = list(path2emb.items())
paths = [p[0] for p in items]
embs = [p[1] for p in items]
embs = np.array(embs)
pair_dist = pairwise_distances(original_feature_x, embs, metric)
max_sum = np.min(pair_dist, axis=0)
sorted_idx = np.argsort(max_sum)[::-1]
highest_num = 0
paired_target_X = None
final_target_class_path = None
for idx in sorted_idx[:5]:
target_class_path = paths[idx]
cur_target_X = self.load_dir(target_class_path)
cur_target_X = np.concatenate([cur_target_X, cur_target_X, cur_target_X])
cur_tot_sum, cur_paired_target_X = self.calculate_dist_score(self.protect_train_X, cur_target_X,
feature_extractors_ls,
metric=metric)
if cur_tot_sum > highest_num:
highest_num = cur_tot_sum
paired_target_X = cur_paired_target_X
final_target_class_path = target_class_path
np.random.shuffle(paired_target_X)
return final_target_class_path, paired_target_X
def calculate_dist_score(self, a, b, feature_extractors_ls, metric='l2'):
features1 = self.extractor_ls_predict(feature_extractors_ls, a)
features2 = self.extractor_ls_predict(feature_extractors_ls, b)
pair_cos = pairwise_distances(features1, features2, metric)
max_sum = np.min(pair_cos, axis=0)
max_sum_arg = np.argsort(max_sum)[::-1]
max_sum_arg = max_sum_arg[:len(a)]
max_sum = [max_sum[i] for i in max_sum_arg]
paired_target_X = [b[j] for j in max_sum_arg]
paired_target_X = np.array(paired_target_X)
return np.min(max_sum), paired_target_X
def get_all_data_path(self, label2path):
all_paths = []
for k, v in label2path.items():
cur_all_paths = [os.path.join(k, cur_p) for cur_p in v]
all_paths.extend(cur_all_paths)
return all_paths
def load_label_data(self, label):
train_label_path = os.path.join(self.train_data_dir, label)
test_label_path = os.path.join(self.test_data_dir, label)
train_X = self.load_dir(train_label_path)
test_X = self.load_dir(test_label_path)
return train_X, test_X
def load_dir(self, path):
assert os.path.exists(path)
x_ls = []
for file in os.listdir(path):
cur_path = os.path.join(path, file)
im = image.load_img(cur_path, target_size=self.img_shape)
im = image.img_to_array(im)
x_ls.append(im)
raw_x = np.array(x_ls)
return preprocess_input(raw_x)
def build_data_mapping(self):
label2path_train = {}
label2path_test = {}
idx = 0
path2idx = {}
for label_name in self.all_labels:
full_path_train = os.path.join(self.train_data_dir, label_name)
full_path_test = os.path.join(self.test_data_dir, label_name)
label2path_train[full_path_train] = list(os.listdir(full_path_train))
label2path_test[full_path_test] = list(os.listdir(full_path_test))
for img_file in os.listdir(full_path_train):
path2idx[os.path.join(full_path_train, img_file)] = idx
for img_file in os.listdir(full_path_test):
path2idx[os.path.join(full_path_test, img_file)] = idx
idx += 1
return label2path_train, label2path_test, path2idx
def generate_data_post_cloak(self, sybil=False):
assert self.cloaked_protect_train_X is not None
while True:
batch_X = []
batch_Y = []
cur_batch_path = random.sample(self.all_training_path, 32)
for p in cur_batch_path:
cur_y = self.path2idx[p]
if p in self.protect_class_path:
cur_x = random.choice(self.cloaked_protect_train_X)
elif sybil and (p in self.sybil_class):
cur_x = random.choice(self.cloaked_sybil_train_X)
else:
im = image.load_img(p, target_size=self.img_shape)
im = image.img_to_array(im)
cur_x = preprocess_input(im)
batch_X.append(cur_x)
batch_Y.append(cur_y)
batch_X = np.array(batch_X)
batch_Y = to_categorical(np.array(batch_Y), num_classes=self.number_classes)
yield batch_X, batch_Y
# class CloakData(object):
# def __init__(self, protect_directory=None, img_shape=(224, 224)):
#
# self.img_shape = img_shape
# # self.train_data_dir, self.test_data_dir, self.number_classes, self.number_samples = get_dataset_path(dataset)
# # self.all_labels = sorted(list(os.listdir(self.train_data_dir)))
# self.protect_directory = protect_directory
#
# self.protect_X = self.load_label_data(self.protect_directory)
#
# self.cloaked_protect_train_X = None
#
# self.label2path_train, self.label2path_test, self.path2idx = self.build_data_mapping()
# self.all_training_path = self.get_all_data_path(self.label2path_train)
# self.all_test_path = self.get_all_data_path(self.label2path_test)
# self.protect_class_path = self.get_class_image_files(os.path.join(self.train_data_dir, self.protect_class))
#
# def get_class_image_files(self, path):
# return [os.path.join(path, f) for f in os.listdir(path)]
#
# def extractor_ls_predict(self, feature_extractors_ls, X):
# feature_ls = []
# for extractor in feature_extractors_ls:
# cur_features = extractor.predict(X)
# feature_ls.append(cur_features)
# concated_feature_ls = np.concatenate(feature_ls, axis=1)
# concated_feature_ls = normalize(concated_feature_ls)
# return concated_feature_ls
#
# def load_embeddings(self, feature_extractors_names):
# dictionaries = []
# for extractor_name in feature_extractors_names:
# path2emb = pickle.load(open("../feature_extractors/embeddings/{}_emb_norm.p".format(extractor_name), "rb"))
# dictionaries.append(path2emb)
#
# merge_dict = {}
# for k in dictionaries[0].keys():
# cur_emb = [dic[k] for dic in dictionaries]
# merge_dict[k] = np.concatenate(cur_emb)
# return merge_dict
#
# def select_target_label(self, feature_extractors_ls, feature_extractors_names, metric='l2'):
# original_feature_x = self.extractor_ls_predict(feature_extractors_ls, self.protect_train_X)
#
# path2emb = self.load_embeddings(feature_extractors_names)
# items = list(path2emb.items())
# paths = [p[0] for p in items]
# embs = [p[1] for p in items]
# embs = np.array(embs)
#
# pair_dist = pairwise_distances(original_feature_x, embs, metric)
# max_sum = np.min(pair_dist, axis=0)
# sorted_idx = np.argsort(max_sum)[::-1]
#
# highest_num = 0
# paired_target_X = None
# final_target_class_path = None
# for idx in sorted_idx[:5]:
# target_class_path = paths[idx]
# cur_target_X = self.load_dir(target_class_path)
# cur_target_X = np.concatenate([cur_target_X, cur_target_X, cur_target_X])
# cur_tot_sum, cur_paired_target_X = self.calculate_dist_score(self.protect_train_X, cur_target_X,
# feature_extractors_ls,
# metric=metric)
# if cur_tot_sum > highest_num:
# highest_num = cur_tot_sum
# paired_target_X = cur_paired_target_X
# final_target_class_path = target_class_path
#
# np.random.shuffle(paired_target_X)
# return final_target_class_path, paired_target_X
#
# def calculate_dist_score(self, a, b, feature_extractors_ls, metric='l2'):
# features1 = self.extractor_ls_predict(feature_extractors_ls, a)
# features2 = self.extractor_ls_predict(feature_extractors_ls, b)
#
# pair_cos = pairwise_distances(features1, features2, metric)
# max_sum = np.min(pair_cos, axis=0)
# max_sum_arg = np.argsort(max_sum)[::-1]
# max_sum_arg = max_sum_arg[:len(a)]
# max_sum = [max_sum[i] for i in max_sum_arg]
# paired_target_X = [b[j] for j in max_sum_arg]
# paired_target_X = np.array(paired_target_X)
# return np.min(max_sum), paired_target_X
#
# def get_all_data_path(self, label2path):
# all_paths = []
# for k, v in label2path.items():
# cur_all_paths = [os.path.join(k, cur_p) for cur_p in v]
# all_paths.extend(cur_all_paths)
# return all_paths
#
# def load_label_data(self, label):
# train_label_path = os.path.join(self.train_data_dir, label)
# test_label_path = os.path.join(self.test_data_dir, label)
# train_X = self.load_dir(train_label_path)
# test_X = self.load_dir(test_label_path)
# return train_X, test_X
#
# def load_dir(self, path):
# assert os.path.exists(path)
# x_ls = []
# for file in os.listdir(path):
# cur_path = os.path.join(path, file)
# im = image.load_img(cur_path, target_size=self.img_shape)
# im = image.img_to_array(im)
# x_ls.append(im)
# raw_x = np.array(x_ls)
# return preprocess_input(raw_x)
#
# def build_data_mapping(self):
# label2path_train = {}
# label2path_test = {}
# idx = 0
# path2idx = {}
# for label_name in self.all_labels:
# full_path_train = os.path.join(self.train_data_dir, label_name)
# full_path_test = os.path.join(self.test_data_dir, label_name)
# label2path_train[full_path_train] = list(os.listdir(full_path_train))
# label2path_test[full_path_test] = list(os.listdir(full_path_test))
# for img_file in os.listdir(full_path_train):
# path2idx[os.path.join(full_path_train, img_file)] = idx
# for img_file in os.listdir(full_path_test):
# path2idx[os.path.join(full_path_test, img_file)] = idx
# idx += 1
# return label2path_train, label2path_test, path2idx
#
# def generate_data_post_cloak(self, sybil=False):
# assert self.cloaked_protect_train_X is not None
# while True:
# batch_X = []
# batch_Y = []
# cur_batch_path = random.sample(self.all_training_path, 32)
# for p in cur_batch_path:
# cur_y = self.path2idx[p]
# if p in self.protect_class_path:
# cur_x = random.choice(self.cloaked_protect_train_X)
# elif sybil and (p in self.sybil_class):
# cur_x = random.choice(self.cloaked_sybil_train_X)
# else:
# im = image.load_img(p, target_size=self.img_shape)
# im = image.img_to_array(im)
# cur_x = preprocess_input(im)
# batch_X.append(cur_x)
# batch_Y.append(cur_y)
# batch_X = np.array(batch_X)
# batch_Y = to_categorical(np.array(batch_Y), num_classes=self.number_classes)
# yield batch_X, batch_Y

118
setup.py
View File

@ -1,23 +1,117 @@
import setuptools
import os
import re
import sys
from setuptools import setup, Command
__PATH__ = os.path.abspath(os.path.dirname(__file__))
with open("README.md", "r") as fh:
long_description = fh.read()
setuptools.setup(
name="fawkes",
version="0.0.1",
author="Shawn Shan",
author_email="shansixiong@cs.uchicago.edu",
description="Fawkes protect user privacy",
def read_version():
__PATH__ = os.path.abspath(os.path.dirname(__file__))
with open(os.path.join(__PATH__, 'fawkes/__init__.py')) as f:
version_match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]",
f.read(), re.M)
if version_match:
return version_match.group(1)
raise RuntimeError("Unable to find __version__ string")
__version__ = read_version()
# brought from https://github.com/kennethreitz/setup.py
class DeployCommand(Command):
description = 'Build and deploy the package to PyPI.'
user_options = []
def initialize_options(self):
pass
def finalize_options(self):
pass
@staticmethod
def status(s):
print(s)
def run(self):
assert 'dev' not in __version__, (
"Only non-devel versions are allowed. "
"__version__ == {}".format(__version__))
with os.popen("git status --short") as fp:
git_status = fp.read().strip()
if git_status:
print("Error: git repository is not clean.\n")
os.system("git status --short")
sys.exit(1)
try:
from shutil import rmtree
self.status('Removing previous builds ...')
rmtree(os.path.join(__PATH__, 'dist'))
except OSError:
pass
self.status('Building Source and Wheel (universal) distribution ...')
os.system('{0} setup.py sdist'.format(sys.executable))
self.status('Uploading the package to PyPI via Twine ...')
ret = os.system('twine upload dist/*')
if ret != 0:
sys.exit(ret)
self.status('Creating git tags ...')
os.system('git tag v{0}'.format(__version__))
os.system('git tag --list')
sys.exit()
setup_requires = []
install_requires = [
'numpy>=1.16.4',
'tensorflow>=1.13.1',
'argparse',
'keras==2.2.5',
'scikit-image',
'pillow>=7.0.0',
'opencv-python>=4.2.0.34',
]
setup(
name='fawkes',
version=__version__,
license='MIT',
description='An utility to protect user privacy',
long_description=long_description,
long_description_content_type="text/markdown",
long_description_content_type='text/markdown',
url="https://github.com/Shawn-Shan/fawkes",
packages=setuptools.find_packages(),
author='Shawn Shan',
author_email='shansixiong@cs.uchicago.edu',
keywords='fawkes privacy clearview',
classifiers=[
"Programming Language :: Python :: 3",
"License :: OSI Approved :: MIT License",
'Development Status :: 3 - Alpha',
'License :: OSI Approved :: MIT License',
"Operating System :: OS Independent",
'Programming Language :: Python :: 3',
'Topic :: System :: Monitoring',
],
packages=['fawkes'],
install_requires=install_requires,
setup_requires=setup_requires,
entry_points={
'console_scripts': ['fawkes=fawkes:main'],
},
cmdclass={
'deploy': DeployCommand,
},
include_package_data=True,
zip_safe=False,
python_requires='>=3.5',
)
)