add files

Former-commit-id: 8999421c9e34ca25384a3adcb879f4f5afdedda4 [formerly 0b6e5924e22331500a9279183550e81146029249] Former-commit-id: 8bafbec9710aa552ecd5dbf010089eee435ae9c9
2026-06-12 21:50:46 +05:30 · 2020-07-02 12:32:46 -05:00
parent d7a25eb292
commit b1e7b67055
5 changed files with 600 additions and 300 deletions
@@ -0,0 +1,24 @@
+# -*- coding: utf-8 -*-
+# @Date    : 2020-07-01
+# @Author  : Shawn Shan (shansixiong@cs.uchicago.edu)
+# @Link    : https://www.shawnshan.com/
+
+
+__version__ = '0.0.2'
+
+from .differentiator import FawkesMaskGeneration
+from .utils import load_extractor, init_gpu, select_target_label, dump_image, reverse_process_cloaked, \
+    Faces
+from .protection import main
+import logging
+import sys
+import os
+logging.getLogger('tensorflow').disabled = True
+
+
+__all__ = (
+    '__version__',
+    'FawkesMaskGeneration', 'load_extractor',
+    'init_gpu',
+    'select_target_label', 'dump_image', 'reverse_process_cloaked', 'Faces', 'main'
+)
@@ -10,7 +10,7 @@ from decimal import Decimal

 import numpy as np
 import tensorflow as tf
-from utils import preprocess, reverse_preprocess
+from .utils import preprocess, reverse_preprocess


 class FawkesMaskGeneration:
@@ -47,7 +47,7 @@ class FawkesMaskGeneration:
                 max_iterations=MAX_ITERATIONS, initial_const=INITIAL_CONST,
                 intensity_range=INTENSITY_RANGE, l_threshold=L_THRESHOLD,
                 max_val=MAX_VAL, keep_final=KEEP_FINAL, maximize=MAXIMIZE, image_shape=IMAGE_SHAPE,
-                 verbose=0, ratio=RATIO, limit_dist=LIMIT_DIST):
+                 verbose=0, ratio=RATIO, limit_dist=LIMIT_DIST, faces=None):

        assert intensity_range in {'raw', 'imagenet', 'inception', 'mnist'}

@@ -69,10 +69,12 @@ class FawkesMaskGeneration:
        self.ratio = ratio
        self.limit_dist = limit_dist
        self.single_shape = list(image_shape)
+        self.faces = faces

        self.input_shape = tuple([self.batch_size] + self.single_shape)

        self.bottleneck_shape = tuple([self.batch_size] + self.single_shape)
+        # self.bottleneck_shape = tuple([self.batch_size, bottleneck_model_ls[0].output_shape[-1]])

        # the variable we're going to optimize over
        self.modifier = tf.Variable(np.zeros(self.input_shape, dtype=np.float32))
@@ -149,8 +151,6 @@ class FawkesMaskGeneration:
                     self.dist_raw,
                     tf.zeros_like(self.dist_raw)))
        self.dist_sum = tf.reduce_sum(tf.where(self.mask, self.dist, tf.zeros_like(self.dist)))
-        # self.dist_sum = 1e-5 * tf.reduce_sum(self.dist)
-        # self.dist_raw_sum = self.dist_sum

        def resize_tensor(input_tensor, model_input_shape):
            if input_tensor.shape[1:] == model_input_shape or model_input_shape[1] is None:
@@ -171,16 +171,14 @@ class FawkesMaskGeneration:

            self.bottleneck_a = bottleneck_model(cur_aimg_input)
            if self.MIMIC_IMG:
-                # cur_timg_input = resize_tensor(self.timg_input, model_input_shape)
-                # cur_simg_input = resize_tensor(self.simg_input, model_input_shape)
                cur_timg_input = self.timg_input
                cur_simg_input = self.simg_input
                self.bottleneck_t = calculate_direction(bottleneck_model, cur_timg_input, cur_simg_input)
-                # self.bottleneck_t = bottleneck_model(cur_timg_input)
            else:
                self.bottleneck_t = self.bottleneck_t_raw

            bottleneck_diff = self.bottleneck_t - self.bottleneck_a
+
            scale_factor = tf.sqrt(tf.reduce_sum(tf.square(self.bottleneck_t), axis=1))

            cur_bottlesim = tf.sqrt(tf.reduce_sum(tf.square(bottleneck_diff), axis=1))
@@ -189,7 +187,6 @@ class FawkesMaskGeneration:

            self.bottlesim += cur_bottlesim

-            # self.bottlesim_push += cur_bottlesim_push_sum
            self.bottlesim_sum += cur_bottlesim_sum

        # sum up the losses
@@ -202,20 +199,13 @@ class FawkesMaskGeneration:
                                               self.loss,
                                               tf.zeros_like(self.loss)))

-        # self.loss_sum = self.dist_sum + tf.reduce_sum(self.bottlesim)
-        # import pdb
-        # pdb.set_trace()
-        # self.loss_sum = tf.reduce_sum(tf.where(self.mask, self.loss, tf.zeros_like(self.loss)))
-
-        # Setup the Adadelta optimizer and keep track of variables
-        # we're creating
        start_vars = set(x.name for x in tf.global_variables())
        self.learning_rate_holder = tf.placeholder(tf.float32, shape=[])
+
        optimizer = tf.train.AdadeltaOptimizer(self.learning_rate_holder)
        # optimizer = tf.train.AdamOptimizer(self.learning_rate_holder)

-        self.train = optimizer.minimize(self.loss_sum,
-                                        var_list=[self.modifier])
+        self.train = optimizer.minimize(self.loss_sum, var_list=[self.modifier])
        end_vars = tf.global_variables()
        new_vars = [x for x in end_vars if x.name not in start_vars]

@@ -297,6 +287,7 @@ class FawkesMaskGeneration:
        LR = self.learning_rate
        nb_imgs = source_imgs.shape[0]
        mask = [True] * nb_imgs + [False] * (self.batch_size - nb_imgs)
+        # mask = [True] * self.batch_size
        mask = np.array(mask, dtype=np.bool)

        source_imgs = np.array(source_imgs)
@@ -317,12 +308,17 @@ class FawkesMaskGeneration:
            timg_tanh_batch = np.zeros(self.input_shape)
        else:
            timg_tanh_batch = np.zeros(self.bottleneck_shape)
+
        weights_batch = np.zeros(self.bottleneck_shape)
        simg_tanh_batch[:nb_imgs] = simg_tanh[:nb_imgs]
        timg_tanh_batch[:nb_imgs] = timg_tanh[:nb_imgs]
        weights_batch[:nb_imgs] = weights[:nb_imgs]
        modifier_batch = np.ones(self.input_shape) * 1e-6

+        temp_images = []
+
+        # set the variables so that we don't have to send them over again
+        if self.MIMIC_IMG:
            self.sess.run(self.setup,
                          {self.assign_timg_tanh: timg_tanh_batch,
                           self.assign_simg_tanh: simg_tanh_batch,
@@ -330,6 +326,16 @@ class FawkesMaskGeneration:
                           self.assign_mask: mask,
                           self.assign_weights: weights_batch,
                           self.assign_modifier: modifier_batch})
+        else:
+            # if directly mimicking a vector, use assign_bottleneck_t_raw
+            # in setup
+            self.sess.run(self.setup,
+                          {self.assign_bottleneck_t_raw: timg_tanh_batch,
+                           self.assign_simg_tanh: simg_tanh_batch,
+                           self.assign_const: CONST,
+                           self.assign_mask: mask,
+                           self.assign_weights: weights_batch,
+                           self.assign_modifier: modifier_batch})

        best_bottlesim = [0] * nb_imgs if self.maximize else [np.inf] * nb_imgs
        best_adv = np.zeros_like(source_imgs)
@@ -347,6 +353,7 @@ class FawkesMaskGeneration:
                     dist_raw_sum,
                     bottlesim_sum / nb_imgs))

+        finished_idx = set()
        try:
            total_distance = [0] * nb_imgs

@@ -369,8 +376,14 @@ class FawkesMaskGeneration:
                    [self.dist_raw,
                     self.bottlesim,
                     self.aimg_input])
+
+                all_clear = True
                for e, (dist_raw, bottlesim, aimg_input) in enumerate(
                        zip(dist_raw_list, bottlesim_list, aimg_input_list)):
+
+                    if e in finished_idx:
+                        continue
+
                    if e >= nb_imgs:
                        break
                    if (bottlesim < best_bottlesim[e] and bottlesim > total_distance[e] * 0.1 and (
@@ -379,40 +392,55 @@ class FawkesMaskGeneration:
                        best_bottlesim[e] = bottlesim
                        best_adv[e] = aimg_input

-                if iteration != 0 and iteration % (self.MAX_ITERATIONS // 3) == 0:
-                    # LR = LR / 2
+                    # if iteration > 20 and (dist_raw >= self.l_threshold or iteration == self.MAX_ITERATIONS - 1):
+                    #     finished_idx.add(e)
+                    #     print("{} finished at dist {}".format(e, dist_raw))
+                    #     best_bottlesim[e] = bottlesim
+                    #     best_adv[e] = aimg_input
+                    #
+                    all_clear = False
+
+                if all_clear:
+                    break
+
+                if iteration != 0 and iteration % (self.MAX_ITERATIONS // 2) == 0:
+                    LR = LR / 2
                    print("Learning Rate: ", LR)

-                if iteration % (self.MAX_ITERATIONS // 10) == 0:
+                if iteration % (self.MAX_ITERATIONS // 5) == 0:
                    if self.verbose == 1:
-                        loss_sum = float(self.sess.run(self.loss_sum))
-                        dist_sum = float(self.sess.run(self.dist_sum))
-                        thresh_over = (dist_sum /
-                                       self.batch_size /
-                                       self.l_threshold *
-                                       100)
                        dist_raw_sum = float(self.sess.run(self.dist_raw_sum))
                        bottlesim_sum = self.sess.run(self.bottlesim_sum)
-                        print('ITER %4d: Total loss: %.4E; perturb: %.6f (%.2f%% over, raw: %.6f); sim: %f'
-                              % (iteration,
-                                 Decimal(loss_sum),
-                                 dist_sum,
-                                 thresh_over,
-                                 dist_raw_sum,
-                                 bottlesim_sum / nb_imgs))
+                        print('ITER %4d perturb: %.5f; sim: %f'
+                              % (iteration, dist_raw_sum / nb_imgs, bottlesim_sum / nb_imgs))
+
+                        # protected_images = aimg_input_list
+                        #
+                        # orginal_images = np.copy(self.faces.cropped_faces)
+                        # cloak_perturbation = reverse_process_cloaked(protected_images) - reverse_process_cloaked(
+                        #     orginal_images)
+                        # final_images = self.faces.merge_faces(cloak_perturbation)
+                        #
+                        # for p_img, img in zip(protected_images, final_images):
+                        #     dump_image(reverse_process_cloaked(p_img),
+                        #                "/home/shansixioing/fawkes/data/emily/emily_cloaked_cropped{}.png".format(iteration),
+                        #                format='png')
+                        #
+                        #     dump_image(img,
+                        #                "/home/shansixioing/fawkes/data/emily/emily_cloaked_{}.png".format(iteration),
+                        #                format='png')
+
        except KeyboardInterrupt:
            pass

        if self.verbose == 1:
            loss_sum = float(self.sess.run(self.loss_sum))
            dist_sum = float(self.sess.run(self.dist_sum))
-            thresh_over = (dist_sum / self.batch_size / self.l_threshold * 100)
            dist_raw_sum = float(self.sess.run(self.dist_raw_sum))
            bottlesim_sum = float(self.sess.run(self.bottlesim_sum))
-            print('END:       Total loss: %.4E; perturb: %.6f (%.2f%% over, raw: %.6f); sim: %f'
+            print('END:       Total loss: %.4E; perturb: %.6f (raw: %.6f); sim: %f'
                  % (Decimal(loss_sum),
                     dist_sum,
-                     thresh_over,
                     dist_raw_sum,
                     bottlesim_sum / nb_imgs))

@@ -1,3 +1,7 @@
+# from __future__ import absolute_import
+# from __future__ import division
+# from __future__ import print_function
+
 import argparse
 import glob
 import os
@@ -5,106 +9,141 @@ import random
 import sys

 import numpy as np
-from differentiator import FawkesMaskGeneration
-from keras.applications.vgg16 import preprocess_input
-from keras.preprocessing import image
-from skimage.transform import resize
-from tensorflow import set_random_seed
-from utils import load_extractor, init_gpu, select_target_label, dump_image, reverse_process_cloaked
+
+from .differentiator import FawkesMaskGeneration
+from .utils import load_extractor, init_gpu, select_target_label, dump_image, reverse_process_cloaked, \
+    Faces

 random.seed(12243)
 np.random.seed(122412)
-set_random_seed(12242)

-BATCH_SIZE = 1
-MAX_ITER = 1000
+BATCH_SIZE = 32


-def generate_cloak_images(sess, feature_extractors, image_X, target_X=None, th=0.01):
+def generate_cloak_images(sess, feature_extractors, image_X, target_emb=None, th=0.01, faces=None, sd=1e9, lr=2,
+                          max_step=500):
    batch_size = BATCH_SIZE if len(image_X) > BATCH_SIZE else len(image_X)

    differentiator = FawkesMaskGeneration(sess, feature_extractors,
                                          batch_size=batch_size,
                                          mimic_img=True,
                                          intensity_range='imagenet',
-                                          initial_const=args.sd,
-                                          learning_rate=args.lr,
-                                          max_iterations=MAX_ITER,
+                                          initial_const=sd,
+                                          learning_rate=lr,
+                                          max_iterations=max_step,
                                          l_threshold=th,
-                                          verbose=1, maximize=False, keep_final=False, image_shape=image_X.shape[1:])
+                                          verbose=1, maximize=False, keep_final=False, image_shape=image_X.shape[1:],
+                                          faces=faces)

-    cloaked_image_X = differentiator.attack(image_X, target_X)
+    cloaked_image_X = differentiator.attack(image_X, target_emb)
    return cloaked_image_X


-def extract_faces(img):
-    #  foo
-    return preprocess_input(resize(img, (224, 224)))
+def check_imgs(imgs):
+    if np.max(imgs) <= 1 and np.min(imgs) >= 0:
+        imgs = imgs * 255.0
+    elif np.max(imgs) <= 255 and np.min(imgs) >= 0:
+        pass
+    else:
+        raise Exception("Image values ")
+    return imgs


-def fawkes():
-    assert os.path.exists(args.directory)
-    assert os.path.isdir(args.directory)
+def main(*argv):
+    if not argv:
+        argv = list(sys.argv)
+
+    # attach SIGPIPE handler to properly handle broken pipe
+    try:  # sigpipe not available under windows. just ignore in this case
+        import signal
+        signal.signal(signal.SIGPIPE, signal.SIG_DFL)
+    except Exception as e:
+        pass
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--directory', '-d', type=str,
+                        help='directory that contain images for cloaking', default='imgs/')
+
+    parser.add_argument('--gpu', type=str,
+                        help='GPU id', default='0')
+
+    parser.add_argument('--mode', type=str,
+                        help='cloak generation mode', default='high')
+    parser.add_argument('--feature-extractor', type=str,
+                        help="name of the feature extractor used for optimization",
+                        default="high_extract")
+
+    parser.add_argument('--th', type=float, default=0.01)
+    parser.add_argument('--max-step', type=int, default=500)
+    parser.add_argument('--sd', type=int, default=1e9)
+    parser.add_argument('--lr', type=float, default=2)
+
+    parser.add_argument('--separate_target', action='store_true')
+
+    parser.add_argument('--format', type=str,
+                        help="final image format",
+                        default="jpg")
+    args = parser.parse_args(argv[1:])
+
+    if args.mode == 'low':
+        args.feature_extractor = "high_extract"
+        args.th = 0.003
+    elif args.mode == 'mid':
+        args.feature_extractor = "high_extract"
+        args.th = 0.005
+    elif args.mode == 'high':
+        args.feature_extractor = "high_extract"
+        args.th = 0.007
+    elif args.mode == 'ultra':
+        args.feature_extractor = "high_extract"
+        args.th = 0.01
+    elif args.mode == 'custom':
+        pass
+    else:
+        raise Exception("mode must be one of 'low', 'mid', 'high', 'ultra', 'custom'")
+
+    assert args.format in ['png', 'jpg', 'jpeg']
+    if args.format == 'jpg':
+        args.format = 'jpeg'

    sess = init_gpu(args.gpu)
-
-    print("Loading {} for optimization".format(args.feature_extractor))
-
-    feature_extractors_ls = [load_extractor(args.feature_extractor)]
+    fs_names = [args.feature_extractor]
+    feature_extractors_ls = [load_extractor(name) for name in fs_names]

    image_paths = glob.glob(os.path.join(args.directory, "*"))
    image_paths = [path for path in image_paths if "_cloaked" not in path.split("/")[-1]]
+    if not image_paths:
+        print("No images in the directory")
+        exit(1)

-    orginal_images = [extract_faces(image.img_to_array(image.load_img(cur_path))) for cur_path in
-                      image_paths]
+    faces = Faces(image_paths, sess)

+    orginal_images = faces.cropped_faces
    orginal_images = np.array(orginal_images)

-    if args.seperate_target:
-        target_images = []
+    if args.separate_target:
+        target_embedding = []
        for org_img in orginal_images:
            org_img = org_img.reshape([1] + list(org_img.shape))
-            tar_img = select_target_label(org_img, feature_extractors_ls, [args.feature_extractor])
-            target_images.append(tar_img)
-        target_images = np.concatenate(target_images)
+            tar_emb = select_target_label(org_img, feature_extractors_ls, fs_names)
+            target_embedding.append(tar_emb)
+        target_embedding = np.concatenate(target_embedding)
    else:
-        target_images = select_target_label(orginal_images, feature_extractors_ls, [args.feature_extractor])
-
-    # file_name = args.directory.split("/")[-1]
-    # os.makedirs(args.result_directory, exist_ok=True)
-    # os.makedirs(os.path.join(args.result_directory, file_name), exist_ok=True)
+        target_embedding = select_target_label(orginal_images, feature_extractors_ls, fs_names)

    protected_images = generate_cloak_images(sess, feature_extractors_ls, orginal_images,
-                                             target_X=target_images, th=args.th)
+                                             target_emb=target_embedding, th=args.th, faces=faces, sd=args.sd,
+                                             lr=args.lr, max_step=args.max_step)

-    for p_img, path in zip(protected_images, image_paths):
-        p_img = reverse_process_cloaked(p_img)
-        file_name = "{}_cloaked.jpeg".format(".".join(path.split(".")[:-1]))
-        dump_image(p_img, file_name, format="JPEG")
+    faces.cloaked_cropped_faces = protected_images

+    cloak_perturbation = reverse_process_cloaked(protected_images) - reverse_process_cloaked(orginal_images)
+    final_images = faces.merge_faces(cloak_perturbation)

-def parse_arguments(argv):
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--gpu', type=str,
-                        help='GPU id', default='0')
-    parser.add_argument('--directory', type=str,
-                        help='directory that contain images for cloaking', default='imgs/')
-
-    parser.add_argument('--feature-extractor', type=str,
-                        help="name of the feature extractor used for optimization",
-                        default="webface_dense_robust_extract")
-
-    parser.add_argument('--th', type=float, default=0.005)
-    parser.add_argument('--sd', type=int, default=1e9)
-    parser.add_argument('--protect_class', type=str, default=None)
-    parser.add_argument('--lr', type=float, default=1)
-
-    parser.add_argument('--result_directory', type=str, default="../results")
-    parser.add_argument('--seperate_target', action='store_true')
-
-    return parser.parse_args(argv)
+    for p_img, cloaked_img, path in zip(final_images, protected_images, image_paths):
+        file_name = "{}_{}_{}_cloaked.{}".format(".".join(path.split(".")[:-1]), args.mode, args.th, args.format)
+        dump_image(p_img, file_name, format=args.format)


 if __name__ == '__main__':
-    args = parse_arguments(sys.argv[1:])
-    fawkes()
+    main(*sys.argv)
@@ -1,19 +1,30 @@
+import glob
+import gzip
 import json
 import os
 import pickle
 import random
+import sys

+stderr = sys.stderr
+sys.stderr = open(os.devnull, 'w')
 import keras
+
+sys.stderr = stderr
 import keras.backend as K
 import numpy as np
 import tensorflow as tf
-from keras.applications.vgg16 import preprocess_input
+from PIL import Image, ExifTags
+# from keras.applications.vgg16 import preprocess_input
 from keras.layers import Dense, Activation
 from keras.models import Model
 from keras.preprocessing import image
-from keras.utils import to_categorical
+from keras.utils import get_file
+from skimage.transform import resize
 from sklearn.metrics import pairwise_distances

+from .align_face import align, aligner
+

 def clip_img(X, preprocessing='raw'):
    X = reverse_preprocess(X, preprocessing)
@@ -22,6 +33,91 @@ def clip_img(X, preprocessing='raw'):
    return X


+def load_image(path):
+    img = Image.open(path)
+    if img._getexif() is not None:
+        for orientation in ExifTags.TAGS.keys():
+            if ExifTags.TAGS[orientation] == 'Orientation':
+                break
+
+        exif = dict(img._getexif().items())
+        if orientation in exif.keys():
+            if exif[orientation] == 3:
+                img = img.rotate(180, expand=True)
+            elif exif[orientation] == 6:
+                img = img.rotate(270, expand=True)
+            elif exif[orientation] == 8:
+                img = img.rotate(90, expand=True)
+            else:
+                pass
+    img = img.convert('RGB')
+    image_array = image.img_to_array(img)
+
+    return image_array
+
+
+class Faces(object):
+    def __init__(self, image_paths, sess):
+        self.aligner = aligner(sess)
+        self.org_faces = []
+        self.cropped_faces = []
+        self.cropped_faces_shape = []
+        self.cropped_index = []
+        self.callback_idx = []
+        for i, p in enumerate(image_paths):
+            cur_img = load_image(p)
+            self.org_faces.append(cur_img)
+            align_img = align(cur_img, self.aligner, margin=0.7)
+            cur_faces = align_img[0]
+
+            cur_shapes = [f.shape[:-1] for f in cur_faces]
+
+            cur_faces_square = []
+            for img in cur_faces:
+                long_size = max([img.shape[1], img.shape[0]])
+                base = np.zeros((long_size, long_size, 3))
+                base[0:img.shape[0], 0:img.shape[1], :] = img
+                cur_faces_square.append(base)
+
+            cur_index = align_img[1]
+            cur_faces_square = [resize(f, (224, 224)) for f in cur_faces_square]
+            self.cropped_faces_shape.extend(cur_shapes)
+            self.cropped_faces.extend(cur_faces_square)
+            self.cropped_index.extend(cur_index)
+            self.callback_idx.extend([i] * len(cur_faces_square))
+
+        if not self.cropped_faces:
+            print("No faces detected")
+            exit(1)
+
+        self.cropped_faces = np.array(self.cropped_faces)
+
+        self.cropped_faces = preprocess(self.cropped_faces, 'imagenet')
+
+        self.cloaked_cropped_faces = None
+        self.cloaked_faces = np.copy(self.org_faces)
+
+    def get_faces(self):
+        return self.cropped_faces
+
+    def merge_faces(self, cloaks):
+
+        self.cloaked_faces = np.copy(self.org_faces)
+
+        for i in range(len(self.cropped_faces)):
+            cur_cloak = cloaks[i]
+            org_shape = self.cropped_faces_shape[i]
+            old_square_shape = max([org_shape[0], org_shape[1]])
+            reshape_cloak = resize(cur_cloak, (old_square_shape, old_square_shape))
+            reshape_cloak = reshape_cloak[0:org_shape[0], 0:org_shape[1], :]
+
+            callback_id = self.callback_idx[i]
+            bb = self.cropped_index[i]
+            self.cloaked_faces[callback_id][bb[1]:bb[3], bb[0]:bb[2], :] += reshape_cloak
+
+        return self.cloaked_faces
+
+
 def dump_dictionary_as_json(dict, outfile):
    j = json.dumps(dict)
    with open(outfile, "wb") as f:
@@ -30,6 +126,8 @@ def dump_dictionary_as_json(dict, outfile):

 def fix_gpu_memory(mem_fraction=1):
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
+    tf_config = None
+    if tf.test.is_gpu_available():
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=mem_fraction)
        tf_config = tf.ConfigProto(gpu_options=gpu_options)
        tf_config.gpu_options.allow_growth = True
@@ -45,7 +143,6 @@ def load_victim_model(number_classes, teacher_model=None, end2end=False):
    for l in teacher_model.layers:
        l.trainable = end2end
    x = teacher_model.layers[-1].output
-
    x = Dense(number_classes)(x)
    x = Activation('softmax', name="act")(x)
    model = Model(teacher_model.input, x)
@@ -141,6 +238,7 @@ def imagenet_preprocessing(x, data_format=None):

    return x

+
 def imagenet_reverse_preprocessing(x, data_format=None):
    import keras.backend as K
    x = np.array(x)
@@ -185,7 +283,20 @@ def build_bottleneck_model(model, cut_off):


 def load_extractor(name):
-    model = keras.models.load_model("../feature_extractors/{}.h5".format(name))
+    model_dir = os.path.join(os.path.expanduser('~'), '.fawkes')
+    os.makedirs(model_dir, exist_ok=True)
+    model_file = os.path.join(model_dir, "{}.h5".format(name))
+    if os.path.exists(model_file):
+        model = keras.models.load_model(model_file)
+    else:
+        get_file("{}.h5".format(name), "http://sandlab.cs.uchicago.edu/fawkes/files/{}.h5".format(name),
+                 cache_dir=model_dir, cache_subdir='')
+
+        get_file("{}_emb.p.gz".format(name), "http://sandlab.cs.uchicago.edu/fawkes/files/{}_emb.p.gz".format(name),
+                 cache_dir=model_dir, cache_subdir='')
+
+        model = keras.models.load_model(model_file)
+
    if hasattr(model.layers[-1], "activation") and model.layers[-1].activation == "softmax":
        raise Exception(
            "Given extractor's last layer is softmax, need to remove the top layers to make it into a feature extractor")
@@ -200,10 +311,11 @@ def load_extractor(name):


 def get_dataset_path(dataset):
-    if not os.path.exists("config.json"):
+    model_dir = os.path.join(os.path.expanduser('~'), '.fawkes')
+    if not os.path.exists(os.path.join(model_dir, "config.json")):
        raise Exception("Please config the datasets before running protection code. See more in README and config.py.")

-    config = json.load(open("config.json", 'r'))
+    config = json.load(open(os.path.join(model_dir, "config.json"), 'r'))
    if dataset not in config:
        raise Exception(
            "Dataset {} does not exist, please download to data/ and add the path to this function... Abort".format(
@@ -217,7 +329,8 @@ def normalize(x):


 def dump_image(x, filename, format="png", scale=False):
-    img = image.array_to_img(x, scale=scale)
+    # img = image.array_to_img(x, scale=scale)
+    img = image.array_to_img(x)
    img.save(filename, format)
    return

@@ -231,13 +344,17 @@ def load_dir(path):
        im = image.img_to_array(im)
        x_ls.append(im)
    raw_x = np.array(x_ls)
-    return preprocess_input(raw_x)
+    return preprocess(raw_x, 'imagenet')


 def load_embeddings(feature_extractors_names):
+    model_dir = os.path.join(os.path.expanduser('~'), '.fawkes')
    dictionaries = []
    for extractor_name in feature_extractors_names:
-        path2emb = pickle.load(open("../feature_extractors/embeddings/{}_emb_norm.p".format(extractor_name), "rb"))
+        fp = gzip.open(os.path.join(model_dir, "{}_emb.p.gz".format(extractor_name)), 'rb')
+        path2emb = pickle.load(fp)
+        fp.close()
+
        dictionaries.append(path2emb)

    merge_dict = {}
@@ -272,6 +389,8 @@ def calculate_dist_score(a, b, feature_extractors_ls, metric='l2'):


 def select_target_label(imgs, feature_extractors_ls, feature_extractors_names, metric='l2'):
+    model_dir = os.path.join(os.path.expanduser('~'), '.fawkes')
+
    original_feature_x = extractor_ls_predict(feature_extractors_ls, imgs)

    path2emb = load_embeddings(feature_extractors_names)
@@ -282,178 +401,174 @@ def select_target_label(imgs, feature_extractors_ls, feature_extractors_names, m

    pair_dist = pairwise_distances(original_feature_x, embs, metric)
    max_sum = np.min(pair_dist, axis=0)
-    sorted_idx = np.argsort(max_sum)[::-1]
+    max_id = np.argmax(max_sum)

-    highest_num = 0
-    paired_target_X = None
-    final_target_class_path = None
-    for idx in sorted_idx[:1]:
-        target_class_path = paths[idx]
-        cur_target_X = load_dir(target_class_path)
-        cur_target_X = np.concatenate([cur_target_X, cur_target_X, cur_target_X])
-        cur_tot_sum, cur_paired_target_X = calculate_dist_score(imgs, cur_target_X,
-                                                                feature_extractors_ls,
-                                                                metric=metric)
-        if cur_tot_sum > highest_num:
-            highest_num = cur_tot_sum
-            paired_target_X = cur_paired_target_X
+    target_data_id = paths[int(max_id)]
+    image_dir = os.path.join(model_dir, "target_data/{}/*".format(target_data_id))
+    if not os.path.exists(image_dir):
+        get_file("{}.h5".format(name), "http://sandlab.cs.uchicago.edu/fawkes/files/target_images".format(name),
+                 cache_dir=model_dir, cache_subdir='')

-    np.random.shuffle(paired_target_X)
-    paired_target_X = list(paired_target_X)
-    while len(paired_target_X) < len(imgs):
-        paired_target_X += paired_target_X
+    image_paths = glob.glob(image_dir)

-    paired_target_X = paired_target_X[:len(imgs)]
-    return np.array(paired_target_X)
+    target_images = [image.img_to_array(image.load_img(cur_path)) for cur_path in
+                     image_paths]

+    target_images = np.array([resize(x, (224, 224)) for x in target_images])
+    target_images = preprocess(target_images, 'imagenet')

+    target_images = list(target_images)
+    while len(target_images) < len(imgs):
+        target_images += target_images

-class CloakData(object):
-    def __init__(self, protect_directory=None, img_shape=(224, 224)):
+    target_images = random.sample(target_images, len(imgs))
+    return np.array(target_images)

-        self.img_shape = img_shape
-
-        # self.train_data_dir, self.test_data_dir, self.number_classes, self.number_samples = get_dataset_path(dataset)
-        # self.all_labels = sorted(list(os.listdir(self.train_data_dir)))
-        self.protect_directory = protect_directory
-
-        self.protect_X = self.load_label_data(self.protect_directory)
-
-        self.cloaked_protect_train_X = None
-
-        self.label2path_train, self.label2path_test, self.path2idx = self.build_data_mapping()
-        self.all_training_path = self.get_all_data_path(self.label2path_train)
-        self.all_test_path = self.get_all_data_path(self.label2path_test)
-        self.protect_class_path = self.get_class_image_files(os.path.join(self.train_data_dir, self.protect_class))
-
-    def get_class_image_files(self, path):
-        return [os.path.join(path, f) for f in os.listdir(path)]
-
-    def extractor_ls_predict(self, feature_extractors_ls, X):
-        feature_ls = []
-        for extractor in feature_extractors_ls:
-            cur_features = extractor.predict(X)
-            feature_ls.append(cur_features)
-        concated_feature_ls = np.concatenate(feature_ls, axis=1)
-        concated_feature_ls = normalize(concated_feature_ls)
-        return concated_feature_ls
-
-    def load_embeddings(self, feature_extractors_names):
-        dictionaries = []
-        for extractor_name in feature_extractors_names:
-            path2emb = pickle.load(open("../feature_extractors/embeddings/{}_emb_norm.p".format(extractor_name), "rb"))
-            dictionaries.append(path2emb)
-
-        merge_dict = {}
-        for k in dictionaries[0].keys():
-            cur_emb = [dic[k] for dic in dictionaries]
-            merge_dict[k] = np.concatenate(cur_emb)
-        return merge_dict
-
-    def select_target_label(self, feature_extractors_ls, feature_extractors_names, metric='l2'):
-        original_feature_x = self.extractor_ls_predict(feature_extractors_ls, self.protect_train_X)
-
-        path2emb = self.load_embeddings(feature_extractors_names)
-        items = list(path2emb.items())
-        paths = [p[0] for p in items]
-        embs = [p[1] for p in items]
-        embs = np.array(embs)
-
-        pair_dist = pairwise_distances(original_feature_x, embs, metric)
-        max_sum = np.min(pair_dist, axis=0)
-        sorted_idx = np.argsort(max_sum)[::-1]
-
-        highest_num = 0
-        paired_target_X = None
-        final_target_class_path = None
-        for idx in sorted_idx[:5]:
-            target_class_path = paths[idx]
-            cur_target_X = self.load_dir(target_class_path)
-            cur_target_X = np.concatenate([cur_target_X, cur_target_X, cur_target_X])
-            cur_tot_sum, cur_paired_target_X = self.calculate_dist_score(self.protect_train_X, cur_target_X,
-                                                                         feature_extractors_ls,
-                                                                         metric=metric)
-            if cur_tot_sum > highest_num:
-                highest_num = cur_tot_sum
-                paired_target_X = cur_paired_target_X
-                final_target_class_path = target_class_path
-
-        np.random.shuffle(paired_target_X)
-        return final_target_class_path, paired_target_X
-
-    def calculate_dist_score(self, a, b, feature_extractors_ls, metric='l2'):
-        features1 = self.extractor_ls_predict(feature_extractors_ls, a)
-        features2 = self.extractor_ls_predict(feature_extractors_ls, b)
-
-        pair_cos = pairwise_distances(features1, features2, metric)
-        max_sum = np.min(pair_cos, axis=0)
-        max_sum_arg = np.argsort(max_sum)[::-1]
-        max_sum_arg = max_sum_arg[:len(a)]
-        max_sum = [max_sum[i] for i in max_sum_arg]
-        paired_target_X = [b[j] for j in max_sum_arg]
-        paired_target_X = np.array(paired_target_X)
-        return np.min(max_sum), paired_target_X
-
-    def get_all_data_path(self, label2path):
-        all_paths = []
-        for k, v in label2path.items():
-            cur_all_paths = [os.path.join(k, cur_p) for cur_p in v]
-            all_paths.extend(cur_all_paths)
-        return all_paths
-
-    def load_label_data(self, label):
-        train_label_path = os.path.join(self.train_data_dir, label)
-        test_label_path = os.path.join(self.test_data_dir, label)
-        train_X = self.load_dir(train_label_path)
-        test_X = self.load_dir(test_label_path)
-        return train_X, test_X
-
-    def load_dir(self, path):
-        assert os.path.exists(path)
-        x_ls = []
-        for file in os.listdir(path):
-            cur_path = os.path.join(path, file)
-            im = image.load_img(cur_path, target_size=self.img_shape)
-            im = image.img_to_array(im)
-            x_ls.append(im)
-        raw_x = np.array(x_ls)
-        return preprocess_input(raw_x)
-
-    def build_data_mapping(self):
-        label2path_train = {}
-        label2path_test = {}
-        idx = 0
-        path2idx = {}
-        for label_name in self.all_labels:
-            full_path_train = os.path.join(self.train_data_dir, label_name)
-            full_path_test = os.path.join(self.test_data_dir, label_name)
-            label2path_train[full_path_train] = list(os.listdir(full_path_train))
-            label2path_test[full_path_test] = list(os.listdir(full_path_test))
-            for img_file in os.listdir(full_path_train):
-                path2idx[os.path.join(full_path_train, img_file)] = idx
-            for img_file in os.listdir(full_path_test):
-                path2idx[os.path.join(full_path_test, img_file)] = idx
-            idx += 1
-        return label2path_train, label2path_test, path2idx
-
-    def generate_data_post_cloak(self, sybil=False):
-        assert self.cloaked_protect_train_X is not None
-        while True:
-            batch_X = []
-            batch_Y = []
-            cur_batch_path = random.sample(self.all_training_path, 32)
-            for p in cur_batch_path:
-                cur_y = self.path2idx[p]
-                if p in self.protect_class_path:
-                    cur_x = random.choice(self.cloaked_protect_train_X)
-                elif sybil and (p in self.sybil_class):
-                    cur_x = random.choice(self.cloaked_sybil_train_X)
-                else:
-                    im = image.load_img(p, target_size=self.img_shape)
-                    im = image.img_to_array(im)
-                    cur_x = preprocess_input(im)
-                batch_X.append(cur_x)
-                batch_Y.append(cur_y)
-            batch_X = np.array(batch_X)
-            batch_Y = to_categorical(np.array(batch_Y), num_classes=self.number_classes)
-            yield batch_X, batch_Y
+# class CloakData(object):
+#     def __init__(self, protect_directory=None, img_shape=(224, 224)):
+#
+#         self.img_shape = img_shape
+#         # self.train_data_dir, self.test_data_dir, self.number_classes, self.number_samples = get_dataset_path(dataset)
+#         # self.all_labels = sorted(list(os.listdir(self.train_data_dir)))
+#         self.protect_directory = protect_directory
+#
+#         self.protect_X = self.load_label_data(self.protect_directory)
+#
+#         self.cloaked_protect_train_X = None
+#
+#         self.label2path_train, self.label2path_test, self.path2idx = self.build_data_mapping()
+#         self.all_training_path = self.get_all_data_path(self.label2path_train)
+#         self.all_test_path = self.get_all_data_path(self.label2path_test)
+#         self.protect_class_path = self.get_class_image_files(os.path.join(self.train_data_dir, self.protect_class))
+#
+#     def get_class_image_files(self, path):
+#         return [os.path.join(path, f) for f in os.listdir(path)]
+#
+#     def extractor_ls_predict(self, feature_extractors_ls, X):
+#         feature_ls = []
+#         for extractor in feature_extractors_ls:
+#             cur_features = extractor.predict(X)
+#             feature_ls.append(cur_features)
+#         concated_feature_ls = np.concatenate(feature_ls, axis=1)
+#         concated_feature_ls = normalize(concated_feature_ls)
+#         return concated_feature_ls
+#
+#     def load_embeddings(self, feature_extractors_names):
+#         dictionaries = []
+#         for extractor_name in feature_extractors_names:
+#             path2emb = pickle.load(open("../feature_extractors/embeddings/{}_emb_norm.p".format(extractor_name), "rb"))
+#             dictionaries.append(path2emb)
+#
+#         merge_dict = {}
+#         for k in dictionaries[0].keys():
+#             cur_emb = [dic[k] for dic in dictionaries]
+#             merge_dict[k] = np.concatenate(cur_emb)
+#         return merge_dict
+#
+#     def select_target_label(self, feature_extractors_ls, feature_extractors_names, metric='l2'):
+#         original_feature_x = self.extractor_ls_predict(feature_extractors_ls, self.protect_train_X)
+#
+#         path2emb = self.load_embeddings(feature_extractors_names)
+#         items = list(path2emb.items())
+#         paths = [p[0] for p in items]
+#         embs = [p[1] for p in items]
+#         embs = np.array(embs)
+#
+#         pair_dist = pairwise_distances(original_feature_x, embs, metric)
+#         max_sum = np.min(pair_dist, axis=0)
+#         sorted_idx = np.argsort(max_sum)[::-1]
+#
+#         highest_num = 0
+#         paired_target_X = None
+#         final_target_class_path = None
+#         for idx in sorted_idx[:5]:
+#             target_class_path = paths[idx]
+#             cur_target_X = self.load_dir(target_class_path)
+#             cur_target_X = np.concatenate([cur_target_X, cur_target_X, cur_target_X])
+#             cur_tot_sum, cur_paired_target_X = self.calculate_dist_score(self.protect_train_X, cur_target_X,
+#                                                                          feature_extractors_ls,
+#                                                                          metric=metric)
+#             if cur_tot_sum > highest_num:
+#                 highest_num = cur_tot_sum
+#                 paired_target_X = cur_paired_target_X
+#                 final_target_class_path = target_class_path
+#
+#         np.random.shuffle(paired_target_X)
+#         return final_target_class_path, paired_target_X
+#
+#     def calculate_dist_score(self, a, b, feature_extractors_ls, metric='l2'):
+#         features1 = self.extractor_ls_predict(feature_extractors_ls, a)
+#         features2 = self.extractor_ls_predict(feature_extractors_ls, b)
+#
+#         pair_cos = pairwise_distances(features1, features2, metric)
+#         max_sum = np.min(pair_cos, axis=0)
+#         max_sum_arg = np.argsort(max_sum)[::-1]
+#         max_sum_arg = max_sum_arg[:len(a)]
+#         max_sum = [max_sum[i] for i in max_sum_arg]
+#         paired_target_X = [b[j] for j in max_sum_arg]
+#         paired_target_X = np.array(paired_target_X)
+#         return np.min(max_sum), paired_target_X
+#
+#     def get_all_data_path(self, label2path):
+#         all_paths = []
+#         for k, v in label2path.items():
+#             cur_all_paths = [os.path.join(k, cur_p) for cur_p in v]
+#             all_paths.extend(cur_all_paths)
+#         return all_paths
+#
+#     def load_label_data(self, label):
+#         train_label_path = os.path.join(self.train_data_dir, label)
+#         test_label_path = os.path.join(self.test_data_dir, label)
+#         train_X = self.load_dir(train_label_path)
+#         test_X = self.load_dir(test_label_path)
+#         return train_X, test_X
+#
+#     def load_dir(self, path):
+#         assert os.path.exists(path)
+#         x_ls = []
+#         for file in os.listdir(path):
+#             cur_path = os.path.join(path, file)
+#             im = image.load_img(cur_path, target_size=self.img_shape)
+#             im = image.img_to_array(im)
+#             x_ls.append(im)
+#         raw_x = np.array(x_ls)
+#         return preprocess_input(raw_x)
+#
+#     def build_data_mapping(self):
+#         label2path_train = {}
+#         label2path_test = {}
+#         idx = 0
+#         path2idx = {}
+#         for label_name in self.all_labels:
+#             full_path_train = os.path.join(self.train_data_dir, label_name)
+#             full_path_test = os.path.join(self.test_data_dir, label_name)
+#             label2path_train[full_path_train] = list(os.listdir(full_path_train))
+#             label2path_test[full_path_test] = list(os.listdir(full_path_test))
+#             for img_file in os.listdir(full_path_train):
+#                 path2idx[os.path.join(full_path_train, img_file)] = idx
+#             for img_file in os.listdir(full_path_test):
+#                 path2idx[os.path.join(full_path_test, img_file)] = idx
+#             idx += 1
+#         return label2path_train, label2path_test, path2idx
+#
+#     def generate_data_post_cloak(self, sybil=False):
+#         assert self.cloaked_protect_train_X is not None
+#         while True:
+#             batch_X = []
+#             batch_Y = []
+#             cur_batch_path = random.sample(self.all_training_path, 32)
+#             for p in cur_batch_path:
+#                 cur_y = self.path2idx[p]
+#                 if p in self.protect_class_path:
+#                     cur_x = random.choice(self.cloaked_protect_train_X)
+#                 elif sybil and (p in self.sybil_class):
+#                     cur_x = random.choice(self.cloaked_sybil_train_X)
+#                 else:
+#                     im = image.load_img(p, target_size=self.img_shape)
+#                     im = image.img_to_array(im)
+#                     cur_x = preprocess_input(im)
+#                 batch_X.append(cur_x)
+#                 batch_Y.append(cur_y)
+#             batch_X = np.array(batch_X)
+#             batch_Y = to_categorical(np.array(batch_Y), num_classes=self.number_classes)
+#             yield batch_X, batch_Y
@@ -1,23 +1,117 @@
-import setuptools
+import os
+import re
+import sys

+from setuptools import setup, Command
+
+__PATH__ = os.path.abspath(os.path.dirname(__file__))

 with open("README.md", "r") as fh:
    long_description = fh.read()

-setuptools.setup(
-    name="fawkes",
-    version="0.0.1",
-    author="Shawn Shan",
-    author_email="shansixiong@cs.uchicago.edu",
-    description="Fawkes protect user privacy",
+
+def read_version():
+    __PATH__ = os.path.abspath(os.path.dirname(__file__))
+    with open(os.path.join(__PATH__, 'fawkes/__init__.py')) as f:
+        version_match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]",
+                                  f.read(), re.M)
+    if version_match:
+        return version_match.group(1)
+    raise RuntimeError("Unable to find __version__ string")
+
+
+__version__ = read_version()
+
+
+# brought from https://github.com/kennethreitz/setup.py
+class DeployCommand(Command):
+    description = 'Build and deploy the package to PyPI.'
+    user_options = []
+
+    def initialize_options(self):
+        pass
+
+    def finalize_options(self):
+        pass
+
+    @staticmethod
+    def status(s):
+        print(s)
+
+    def run(self):
+
+        assert 'dev' not in __version__, (
+            "Only non-devel versions are allowed. "
+            "__version__ == {}".format(__version__))
+
+        with os.popen("git status --short") as fp:
+            git_status = fp.read().strip()
+            if git_status:
+                print("Error: git repository is not clean.\n")
+                os.system("git status --short")
+                sys.exit(1)
+
+        try:
+            from shutil import rmtree
+            self.status('Removing previous builds ...')
+            rmtree(os.path.join(__PATH__, 'dist'))
+        except OSError:
+            pass
+
+        self.status('Building Source and Wheel (universal) distribution ...')
+        os.system('{0} setup.py sdist'.format(sys.executable))
+
+        self.status('Uploading the package to PyPI via Twine ...')
+        ret = os.system('twine upload dist/*')
+        if ret != 0:
+            sys.exit(ret)
+
+        self.status('Creating git tags ...')
+        os.system('git tag v{0}'.format(__version__))
+        os.system('git tag --list')
+        sys.exit()
+
+
+setup_requires = []
+
+install_requires = [
+    'numpy>=1.16.4',
+    'tensorflow>=1.13.1',
+    'argparse',
+    'keras==2.2.5',
+    'scikit-image',
+    'pillow>=7.0.0',
+    'opencv-python>=4.2.0.34',
+]
+
+setup(
+    name='fawkes',
+    version=__version__,
+    license='MIT',
+    description='An utility to protect user privacy',
    long_description=long_description,
-    long_description_content_type="text/markdown",
+    long_description_content_type='text/markdown',
    url="https://github.com/Shawn-Shan/fawkes",
-    packages=setuptools.find_packages(),
+    author='Shawn Shan',
+    author_email='shansixiong@cs.uchicago.edu',
+    keywords='fawkes privacy clearview',
    classifiers=[
-        "Programming Language :: Python :: 3",
-        "License :: OSI Approved :: MIT License",
+        'Development Status :: 3 - Alpha',
+        'License :: OSI Approved :: MIT License',
        "Operating System :: OS Independent",
+        'Programming Language :: Python :: 3',
+        'Topic :: System :: Monitoring',
    ],
+    packages=['fawkes'],
+    install_requires=install_requires,
+    setup_requires=setup_requires,
+    entry_points={
+        'console_scripts': ['fawkes=fawkes:main'],
+    },
+    cmdclass={
+        'deploy': DeployCommand,
+    },
+    include_package_data=True,
+    zip_safe=False,
    python_requires='>=3.5',
 )