diff --git a/fawkes/config.py b/fawkes/config.py new file mode 100644 index 0000000..70a9e77 --- /dev/null +++ b/fawkes/config.py @@ -0,0 +1,42 @@ +import glob +import json +import os + +DATASETS = { + "pubfig": "../data/pubfig/", + "scrub": "../data/scrub/", + "vggface1": "/mnt/data/sixiongshan/data/vggface/", + # "vggface2": "/mnt/data/sixiongshan/data/vggface2/", + "webface": "/mnt/data/sixiongshan/data/webface/", + # "youtubeface": "/mnt/data/sixiongshan/data/youtubeface/keras_flow_data/", +} + + + +def main(): + config = {} + for dataset in DATASETS.keys(): + path = DATASETS[dataset] + if not os.path.exists(path): + print("Dataset path for {} does not exist, skipped".format(dataset)) + continue + train_dir = os.path.join(path, "train") + test_dir = os.path.join(path, "test") + if not os.path.exists(train_dir): + print("Training dataset path for {} does not exist, skipped".format(dataset)) + continue + num_classes = len(os.listdir(train_dir)) + num_images = len(glob.glob(os.path.join(train_dir, "*/*"))) + if num_images == 0 or num_classes == 0 or num_images == num_classes: + raise Exception("Dataset {} is not setup as detailed in README.".format(dataset)) + + config[dataset] = {"train_dir": train_dir, "test_dir": test_dir, "num_classes": num_classes, + "num_images": num_images} + print("Successfully config {}".format(dataset)) + j = json.dumps(config) + with open("config.json", "wb") as f: + f.write(j.encode()) + + +if __name__ == '__main__': + main() diff --git a/fawkes/differentiator.py b/fawkes/differentiator.py index f788bd5..38dfada 100644 --- a/fawkes/differentiator.py +++ b/fawkes/differentiator.py @@ -7,6 +7,7 @@ import datetime import time from decimal import Decimal + import numpy as np import tensorflow as tf from utils import preprocess, reverse_preprocess diff --git a/fawkes/eval_cloak.py b/fawkes/eval_cloak.py index 474f8ad..7c2b581 100644 --- a/fawkes/eval_cloak.py +++ b/fawkes/eval_cloak.py @@ -102,9 +102,6 @@ def main(): else: raise ValueError - CLOAK_DIR = CLOAK_DIR + "_th{}_sd{}".format(args.th, int(args.sd)) - print(CLOAK_DIR) - CLOAK_DIR = os.path.join("../results", CLOAK_DIR) RES = pickle.load(open(os.path.join(CLOAK_DIR, "cloak_data.p"), 'rb')) @@ -127,7 +124,7 @@ def main(): try: model.fit_generator(train_generator, steps_per_epoch=cloak_data.number_samples // 32, - validation_data=(original_X, original_Y), epochs=args.n_epochs, verbose=1, + validation_data=(original_X, original_Y), epochs=args.n_epochs, verbose=2, use_multiprocessing=False, workers=1) except KeyboardInterrupt: pass @@ -144,7 +141,8 @@ def main(): print("Accuracy on other classes {:.4f}".format(other_acc)) EVAL_RES['other_acc'] = other_acc dump_dictionary_as_json(EVAL_RES, - os.path.join(CLOAK_DIR, "eval_seed{}_th{}.json".format(args.seed_idx, args.th))) + os.path.join(CLOAK_DIR, + "eval_seed{}_th{}_sd{}.json".format(args.seed_idx, args.th, args.sd))) def parse_arguments(argv): @@ -158,7 +156,7 @@ def parse_arguments(argv): help='name of dataset', default='scrub') parser.add_argument('--cloak_data', type=str, help='name of the cloak result directory', - default='scrub_webface_dense_robust_protectPatrick_Dempsey') + default='scrub_webface_dense_robust_protectKristen_Alderson') parser.add_argument('--sd', type=int, default=1e6) parser.add_argument('--th', type=float, default=0.01) @@ -167,7 +165,7 @@ def parse_arguments(argv): help='student model', default='../feature_extractors/vggface2_inception_extract.h5') parser.add_argument('--batch_size', type=int, default=32) parser.add_argument('--validation_split', type=float, default=0.1) - parser.add_argument('--n_epochs', type=int, default=3) + parser.add_argument('--n_epochs', type=int, default=5) return parser.parse_args(argv) diff --git a/fawkes/prepare_feature_extractor.py b/fawkes/prepare_feature_extractor.py new file mode 100644 index 0000000..a4d2bf4 --- /dev/null +++ b/fawkes/prepare_feature_extractor.py @@ -0,0 +1,64 @@ +import argparse +import os +import pickle +import random +import sys + +import numpy as np +from keras.applications.vgg16 import preprocess_input +from keras.preprocessing import image +from utils import load_extractor, get_dataset_path + + +def load_sample_dir(path, sample=10): + x_ls = [] + image_paths = list(os.listdir(path)) + random.shuffle(image_paths) + for i, file in enumerate(image_paths): + if i > sample: + break + cur_path = os.path.join(path, file) + im = image.load_img(cur_path, target_size=(224, 224)) + im = image.img_to_array(im) + x_ls.append(im) + raw_x = np.array(x_ls) + return preprocess_input(raw_x) + + +def normalize(x): + return x / np.linalg.norm(x) + + +def main(): + extractor = load_extractor(args.feature_extractor) + path2emb = {} + for target_dataset in args.candidate_datasets: + target_dataset_path, _, _, _ = get_dataset_path(target_dataset) + for target_class in os.listdir(target_dataset_path): + target_class_path = os.path.join(target_dataset_path, target_class) + target_X = load_sample_dir(target_class_path) + cur_feature = extractor.predict(target_X) + cur_feature = np.mean(cur_feature, axis=0) + path2emb[target_class_path] = cur_feature + + for k, v in path2emb.items(): + path2emb[k] = normalize(v) + + pickle.dump(path2emb, open("../feature_extractors/embeddings/{}_emb_norm.p".format(args.feature_extractor), "wb")) + + +def parse_arguments(argv): + parser = argparse.ArgumentParser() + parser.add_argument('--gpu', type=str, + help='GPU id', default='0') + parser.add_argument('--candidate-datasets', nargs='+', + help='path candidate datasets') + parser.add_argument('--feature-extractor', type=str, + help="name of the feature extractor used for optimization", + default="webface_dense_robust_extract") + return parser.parse_args(argv) + + +if __name__ == '__main__': + args = parse_arguments(sys.argv[1:]) + main() diff --git a/fawkes/protection.py b/fawkes/protection.py index 4e28531..fc99cb2 100644 --- a/fawkes/protection.py +++ b/fawkes/protection.py @@ -9,13 +9,12 @@ from differentiator import FawkesMaskGeneration from tensorflow import set_random_seed from utils import load_extractor, CloakData, init_gpu -# random.seed(12243) np.random.seed(122412) set_random_seed(12242) -NUM_IMG_PROTECTED = 10 # Number of images used to optimize the target class -BATCH_SIZE = 10 +NUM_IMG_PROTECTED = 32 # Number of images used to optimize the target class +BATCH_SIZE = 32 IMG_SHAPE = [224, 224, 3] @@ -53,16 +52,13 @@ def perform_defense(): num_protect = NUM_IMG_PROTECTED print("Loading {} for optimization".format(args.feature_extractor)) - feature_extractors_ls = [load_extractor(name, layer_idx=args.layer_idx) for name in FEATURE_EXTRACTORS] + feature_extractors_ls = [load_extractor(name) for name in FEATURE_EXTRACTORS] protect_class = args.protect_class cloak_data = CloakData(args.dataset, target_selection_tries=1, protect_class=protect_class) model_name = args.feature_extractor.split("/")[-1].split('.')[0].replace("_extract", "") - RES_FILE_NAME = "{}_{}_protect{}_th{}_sd{}".format(args.dataset, model_name, cloak_data.protect_class, args.th, - args.sd) + RES_FILE_NAME = "{}_{}_protect{}".format(args.dataset, model_name, cloak_data.protect_class) RES_FILE_NAME = os.path.join(RES_DIR, RES_FILE_NAME) - if os.path.exists(RES_FILE_NAME): - exit(1) print("Protect Class: ", cloak_data.protect_class) cloak_data.target_path, cloak_data.target_data = cloak_data.select_target_label(feature_extractors_ls, @@ -88,13 +84,9 @@ def parse_arguments(argv): help='name of dataset', default='scrub') parser.add_argument('--feature-extractor', type=str, help="name of the feature extractor used for optimization", - default="../feature_extractors/webface_dense_robust_extract.h5") - parser.add_argument('--layer-idx', type=int, - help="the idx of the layer of neuron that are used as feature space", - default=-3) - - parser.add_argument('--th', type=float, default=0.01) - parser.add_argument('--sd', type=int, default=1e4) + default="webface_dense_robust") + parser.add_argument('--th', type=float, default=0.007) + parser.add_argument('--sd', type=int, default=1e5) parser.add_argument('--protect_class', type=str, default=None) parser.add_argument('--lr', type=float, default=0.1) diff --git a/fawkes/utils.py b/fawkes/utils.py index 430f353..b2a9fe3 100644 --- a/fawkes/utils.py +++ b/fawkes/utils.py @@ -68,7 +68,6 @@ def init_gpu(gpu_index, force=False): def preprocess(X, method): - # assume color last assert method in {'raw', 'imagenet', 'inception', 'mnist'} if method is 'raw': @@ -82,7 +81,6 @@ def preprocess(X, method): def reverse_preprocess(X, method): - # assume color last assert method in {'raw', 'imagenet', 'inception', 'mnist'} if method is 'raw': @@ -146,13 +144,6 @@ def imagenet_preprocessing(x, data_format=None): def imagenet_reverse_preprocessing(x, data_format=None): import keras.backend as K - """ Reverse preprocesses a tensor encoding a batch of images. - # Arguments - x: input Numpy tensor, 4D. - data_format: data format of the image tensor. - # Returns - Preprocessed tensor. - """ x = np.array(x) if data_format is None: data_format = K.image_data_format() @@ -189,37 +180,32 @@ def build_bottleneck_model(model, cut_off): return bottleneck_model -def load_extractor(name, layer_idx=None): - model = keras.models.load_model(name) - - if "extract" in name.split("/")[-1]: - model = keras.models.load_model(name) - else: - print("Convert a model to a feature extractor") - model = build_bottleneck_model(model, model.layers[layer_idx].name) - model.save(name + "extract") - model = keras.models.load_model(name + "extract") - +def load_extractor(name): + model = keras.models.load_model("../feature_extractors/{}.h5".format(name)) + if hasattr(model.layers[-1], "activation") and model.layers[-1].activation == "softmax": + raise Exception( + "Given extractor's last layer is softmax, need to remove the top layers to make it into a feature extractor") + # if "extract" in name.split("/")[-1]: + # pass + # else: + # print("Convert a model to a feature extractor") + # model = build_bottleneck_model(model, model.layers[layer_idx].name) + # model.save(name + "extract") + # model = keras.models.load_model(name + "extract") return model def get_dataset_path(dataset): - if dataset == "scrub": - train_data_dir = '../data/scrub/train' - test_data_dir = '../data/scrub/test' - number_classes = 530 - number_samples = 57838 - elif dataset == "pubfig": - train_data_dir = '../data/pubfig/train' - test_data_dir = '../data/pubfig/test' - number_classes = 65 - number_samples = 5979 - else: + if not os.path.exists("config.json"): + raise Exception("Please config the datasets before running protection code. See more in README and config.py.") + + config = json.load(open("config.json", 'r')) + if dataset not in config: raise Exception( "Dataset {} does not exist, please download to data/ and add the path to this function... Abort".format( dataset)) - - return train_data_dir, test_data_dir, number_classes, number_samples + return config[dataset]['train_dir'], config[dataset]['test_dir'], config[dataset]['num_classes'], config[dataset][ + 'num_images'] def normalize(x): @@ -227,10 +213,9 @@ def normalize(x): class CloakData(object): - def __init__(self, dataset, img_shape=(224, 224), target_selection_tries=30, protect_class=None): + def __init__(self, dataset, img_shape=(224, 224), protect_class=None): self.dataset = dataset self.img_shape = img_shape - self.target_selection_tries = target_selection_tries self.train_data_dir, self.test_data_dir, self.number_classes, self.number_samples = get_dataset_path(dataset) self.all_labels = sorted(list(os.listdir(self.train_data_dir))) @@ -269,7 +254,6 @@ class CloakData(object): def load_embeddings(self, feature_extractors_names): dictionaries = [] for extractor_name in feature_extractors_names: - extractor_name = extractor_name.split("/")[-1].split('.')[0].replace("_extract", "") path2emb = pickle.load(open("../feature_extractors/embeddings/{}_emb_norm.p".format(extractor_name), "rb")) dictionaries.append(path2emb) @@ -288,14 +272,14 @@ class CloakData(object): embs = [p[1] for p in items] embs = np.array(embs) - pair_dist = pairwise_distances(original_feature_x, embs, 'l2') + pair_dist = pairwise_distances(original_feature_x, embs, metric) max_sum = np.min(pair_dist, axis=0) sorted_idx = np.argsort(max_sum)[::-1] highest_num = 0 paired_target_X = None final_target_class_path = None - for idx in sorted_idx[:2]: + for idx in sorted_idx[:5]: target_class_path = paths[idx] cur_target_X = self.load_dir(target_class_path) cur_target_X = np.concatenate([cur_target_X, cur_target_X, cur_target_X])