2
0
mirror of https://github.com/Shawn-Shan/fawkes.git synced 2024-09-20 07:26:37 +05:30

standarize feature extractors

This commit is contained in:
Shawn-Shan 2020-06-01 07:50:02 -07:00
parent de558e841e
commit 83bb798373
6 changed files with 141 additions and 60 deletions

42
fawkes/config.py Normal file
View File

@ -0,0 +1,42 @@
import glob
import json
import os
DATASETS = {
"pubfig": "../data/pubfig/",
"scrub": "../data/scrub/",
"vggface1": "/mnt/data/sixiongshan/data/vggface/",
# "vggface2": "/mnt/data/sixiongshan/data/vggface2/",
"webface": "/mnt/data/sixiongshan/data/webface/",
# "youtubeface": "/mnt/data/sixiongshan/data/youtubeface/keras_flow_data/",
}
def main():
config = {}
for dataset in DATASETS.keys():
path = DATASETS[dataset]
if not os.path.exists(path):
print("Dataset path for {} does not exist, skipped".format(dataset))
continue
train_dir = os.path.join(path, "train")
test_dir = os.path.join(path, "test")
if not os.path.exists(train_dir):
print("Training dataset path for {} does not exist, skipped".format(dataset))
continue
num_classes = len(os.listdir(train_dir))
num_images = len(glob.glob(os.path.join(train_dir, "*/*")))
if num_images == 0 or num_classes == 0 or num_images == num_classes:
raise Exception("Dataset {} is not setup as detailed in README.".format(dataset))
config[dataset] = {"train_dir": train_dir, "test_dir": test_dir, "num_classes": num_classes,
"num_images": num_images}
print("Successfully config {}".format(dataset))
j = json.dumps(config)
with open("config.json", "wb") as f:
f.write(j.encode())
if __name__ == '__main__':
main()

View File

@ -7,6 +7,7 @@
import datetime import datetime
import time import time
from decimal import Decimal from decimal import Decimal
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
from utils import preprocess, reverse_preprocess from utils import preprocess, reverse_preprocess

View File

@ -102,9 +102,6 @@ def main():
else: else:
raise ValueError raise ValueError
CLOAK_DIR = CLOAK_DIR + "_th{}_sd{}".format(args.th, int(args.sd))
print(CLOAK_DIR)
CLOAK_DIR = os.path.join("../results", CLOAK_DIR) CLOAK_DIR = os.path.join("../results", CLOAK_DIR)
RES = pickle.load(open(os.path.join(CLOAK_DIR, "cloak_data.p"), 'rb')) RES = pickle.load(open(os.path.join(CLOAK_DIR, "cloak_data.p"), 'rb'))
@ -127,7 +124,7 @@ def main():
try: try:
model.fit_generator(train_generator, steps_per_epoch=cloak_data.number_samples // 32, model.fit_generator(train_generator, steps_per_epoch=cloak_data.number_samples // 32,
validation_data=(original_X, original_Y), epochs=args.n_epochs, verbose=1, validation_data=(original_X, original_Y), epochs=args.n_epochs, verbose=2,
use_multiprocessing=False, workers=1) use_multiprocessing=False, workers=1)
except KeyboardInterrupt: except KeyboardInterrupt:
pass pass
@ -144,7 +141,8 @@ def main():
print("Accuracy on other classes {:.4f}".format(other_acc)) print("Accuracy on other classes {:.4f}".format(other_acc))
EVAL_RES['other_acc'] = other_acc EVAL_RES['other_acc'] = other_acc
dump_dictionary_as_json(EVAL_RES, dump_dictionary_as_json(EVAL_RES,
os.path.join(CLOAK_DIR, "eval_seed{}_th{}.json".format(args.seed_idx, args.th))) os.path.join(CLOAK_DIR,
"eval_seed{}_th{}_sd{}.json".format(args.seed_idx, args.th, args.sd)))
def parse_arguments(argv): def parse_arguments(argv):
@ -158,7 +156,7 @@ def parse_arguments(argv):
help='name of dataset', default='scrub') help='name of dataset', default='scrub')
parser.add_argument('--cloak_data', type=str, parser.add_argument('--cloak_data', type=str,
help='name of the cloak result directory', help='name of the cloak result directory',
default='scrub_webface_dense_robust_protectPatrick_Dempsey') default='scrub_webface_dense_robust_protectKristen_Alderson')
parser.add_argument('--sd', type=int, default=1e6) parser.add_argument('--sd', type=int, default=1e6)
parser.add_argument('--th', type=float, default=0.01) parser.add_argument('--th', type=float, default=0.01)
@ -167,7 +165,7 @@ def parse_arguments(argv):
help='student model', default='../feature_extractors/vggface2_inception_extract.h5') help='student model', default='../feature_extractors/vggface2_inception_extract.h5')
parser.add_argument('--batch_size', type=int, default=32) parser.add_argument('--batch_size', type=int, default=32)
parser.add_argument('--validation_split', type=float, default=0.1) parser.add_argument('--validation_split', type=float, default=0.1)
parser.add_argument('--n_epochs', type=int, default=3) parser.add_argument('--n_epochs', type=int, default=5)
return parser.parse_args(argv) return parser.parse_args(argv)

View File

@ -0,0 +1,64 @@
import argparse
import os
import pickle
import random
import sys
import numpy as np
from keras.applications.vgg16 import preprocess_input
from keras.preprocessing import image
from utils import load_extractor, get_dataset_path
def load_sample_dir(path, sample=10):
x_ls = []
image_paths = list(os.listdir(path))
random.shuffle(image_paths)
for i, file in enumerate(image_paths):
if i > sample:
break
cur_path = os.path.join(path, file)
im = image.load_img(cur_path, target_size=(224, 224))
im = image.img_to_array(im)
x_ls.append(im)
raw_x = np.array(x_ls)
return preprocess_input(raw_x)
def normalize(x):
return x / np.linalg.norm(x)
def main():
extractor = load_extractor(args.feature_extractor)
path2emb = {}
for target_dataset in args.candidate_datasets:
target_dataset_path, _, _, _ = get_dataset_path(target_dataset)
for target_class in os.listdir(target_dataset_path):
target_class_path = os.path.join(target_dataset_path, target_class)
target_X = load_sample_dir(target_class_path)
cur_feature = extractor.predict(target_X)
cur_feature = np.mean(cur_feature, axis=0)
path2emb[target_class_path] = cur_feature
for k, v in path2emb.items():
path2emb[k] = normalize(v)
pickle.dump(path2emb, open("../feature_extractors/embeddings/{}_emb_norm.p".format(args.feature_extractor), "wb"))
def parse_arguments(argv):
parser = argparse.ArgumentParser()
parser.add_argument('--gpu', type=str,
help='GPU id', default='0')
parser.add_argument('--candidate-datasets', nargs='+',
help='path candidate datasets')
parser.add_argument('--feature-extractor', type=str,
help="name of the feature extractor used for optimization",
default="webface_dense_robust_extract")
return parser.parse_args(argv)
if __name__ == '__main__':
args = parse_arguments(sys.argv[1:])
main()

View File

@ -9,13 +9,12 @@ from differentiator import FawkesMaskGeneration
from tensorflow import set_random_seed from tensorflow import set_random_seed
from utils import load_extractor, CloakData, init_gpu from utils import load_extractor, CloakData, init_gpu
#
random.seed(12243) random.seed(12243)
np.random.seed(122412) np.random.seed(122412)
set_random_seed(12242) set_random_seed(12242)
NUM_IMG_PROTECTED = 10 # Number of images used to optimize the target class NUM_IMG_PROTECTED = 32 # Number of images used to optimize the target class
BATCH_SIZE = 10 BATCH_SIZE = 32
IMG_SHAPE = [224, 224, 3] IMG_SHAPE = [224, 224, 3]
@ -53,16 +52,13 @@ def perform_defense():
num_protect = NUM_IMG_PROTECTED num_protect = NUM_IMG_PROTECTED
print("Loading {} for optimization".format(args.feature_extractor)) print("Loading {} for optimization".format(args.feature_extractor))
feature_extractors_ls = [load_extractor(name, layer_idx=args.layer_idx) for name in FEATURE_EXTRACTORS] feature_extractors_ls = [load_extractor(name) for name in FEATURE_EXTRACTORS]
protect_class = args.protect_class protect_class = args.protect_class
cloak_data = CloakData(args.dataset, target_selection_tries=1, protect_class=protect_class) cloak_data = CloakData(args.dataset, target_selection_tries=1, protect_class=protect_class)
model_name = args.feature_extractor.split("/")[-1].split('.')[0].replace("_extract", "") model_name = args.feature_extractor.split("/")[-1].split('.')[0].replace("_extract", "")
RES_FILE_NAME = "{}_{}_protect{}_th{}_sd{}".format(args.dataset, model_name, cloak_data.protect_class, args.th, RES_FILE_NAME = "{}_{}_protect{}".format(args.dataset, model_name, cloak_data.protect_class)
args.sd)
RES_FILE_NAME = os.path.join(RES_DIR, RES_FILE_NAME) RES_FILE_NAME = os.path.join(RES_DIR, RES_FILE_NAME)
if os.path.exists(RES_FILE_NAME):
exit(1)
print("Protect Class: ", cloak_data.protect_class) print("Protect Class: ", cloak_data.protect_class)
cloak_data.target_path, cloak_data.target_data = cloak_data.select_target_label(feature_extractors_ls, cloak_data.target_path, cloak_data.target_data = cloak_data.select_target_label(feature_extractors_ls,
@ -88,13 +84,9 @@ def parse_arguments(argv):
help='name of dataset', default='scrub') help='name of dataset', default='scrub')
parser.add_argument('--feature-extractor', type=str, parser.add_argument('--feature-extractor', type=str,
help="name of the feature extractor used for optimization", help="name of the feature extractor used for optimization",
default="../feature_extractors/webface_dense_robust_extract.h5") default="webface_dense_robust")
parser.add_argument('--layer-idx', type=int, parser.add_argument('--th', type=float, default=0.007)
help="the idx of the layer of neuron that are used as feature space", parser.add_argument('--sd', type=int, default=1e5)
default=-3)
parser.add_argument('--th', type=float, default=0.01)
parser.add_argument('--sd', type=int, default=1e4)
parser.add_argument('--protect_class', type=str, default=None) parser.add_argument('--protect_class', type=str, default=None)
parser.add_argument('--lr', type=float, default=0.1) parser.add_argument('--lr', type=float, default=0.1)

View File

@ -68,7 +68,6 @@ def init_gpu(gpu_index, force=False):
def preprocess(X, method): def preprocess(X, method):
# assume color last
assert method in {'raw', 'imagenet', 'inception', 'mnist'} assert method in {'raw', 'imagenet', 'inception', 'mnist'}
if method is 'raw': if method is 'raw':
@ -82,7 +81,6 @@ def preprocess(X, method):
def reverse_preprocess(X, method): def reverse_preprocess(X, method):
# assume color last
assert method in {'raw', 'imagenet', 'inception', 'mnist'} assert method in {'raw', 'imagenet', 'inception', 'mnist'}
if method is 'raw': if method is 'raw':
@ -146,13 +144,6 @@ def imagenet_preprocessing(x, data_format=None):
def imagenet_reverse_preprocessing(x, data_format=None): def imagenet_reverse_preprocessing(x, data_format=None):
import keras.backend as K import keras.backend as K
""" Reverse preprocesses a tensor encoding a batch of images.
# Arguments
x: input Numpy tensor, 4D.
data_format: data format of the image tensor.
# Returns
Preprocessed tensor.
"""
x = np.array(x) x = np.array(x)
if data_format is None: if data_format is None:
data_format = K.image_data_format() data_format = K.image_data_format()
@ -189,37 +180,32 @@ def build_bottleneck_model(model, cut_off):
return bottleneck_model return bottleneck_model
def load_extractor(name, layer_idx=None): def load_extractor(name):
model = keras.models.load_model(name) model = keras.models.load_model("../feature_extractors/{}.h5".format(name))
if hasattr(model.layers[-1], "activation") and model.layers[-1].activation == "softmax":
if "extract" in name.split("/")[-1]: raise Exception(
model = keras.models.load_model(name) "Given extractor's last layer is softmax, need to remove the top layers to make it into a feature extractor")
else: # if "extract" in name.split("/")[-1]:
print("Convert a model to a feature extractor") # pass
model = build_bottleneck_model(model, model.layers[layer_idx].name) # else:
model.save(name + "extract") # print("Convert a model to a feature extractor")
model = keras.models.load_model(name + "extract") # model = build_bottleneck_model(model, model.layers[layer_idx].name)
# model.save(name + "extract")
# model = keras.models.load_model(name + "extract")
return model return model
def get_dataset_path(dataset): def get_dataset_path(dataset):
if dataset == "scrub": if not os.path.exists("config.json"):
train_data_dir = '../data/scrub/train' raise Exception("Please config the datasets before running protection code. See more in README and config.py.")
test_data_dir = '../data/scrub/test'
number_classes = 530 config = json.load(open("config.json", 'r'))
number_samples = 57838 if dataset not in config:
elif dataset == "pubfig":
train_data_dir = '../data/pubfig/train'
test_data_dir = '../data/pubfig/test'
number_classes = 65
number_samples = 5979
else:
raise Exception( raise Exception(
"Dataset {} does not exist, please download to data/ and add the path to this function... Abort".format( "Dataset {} does not exist, please download to data/ and add the path to this function... Abort".format(
dataset)) dataset))
return config[dataset]['train_dir'], config[dataset]['test_dir'], config[dataset]['num_classes'], config[dataset][
return train_data_dir, test_data_dir, number_classes, number_samples 'num_images']
def normalize(x): def normalize(x):
@ -227,10 +213,9 @@ def normalize(x):
class CloakData(object): class CloakData(object):
def __init__(self, dataset, img_shape=(224, 224), target_selection_tries=30, protect_class=None): def __init__(self, dataset, img_shape=(224, 224), protect_class=None):
self.dataset = dataset self.dataset = dataset
self.img_shape = img_shape self.img_shape = img_shape
self.target_selection_tries = target_selection_tries
self.train_data_dir, self.test_data_dir, self.number_classes, self.number_samples = get_dataset_path(dataset) self.train_data_dir, self.test_data_dir, self.number_classes, self.number_samples = get_dataset_path(dataset)
self.all_labels = sorted(list(os.listdir(self.train_data_dir))) self.all_labels = sorted(list(os.listdir(self.train_data_dir)))
@ -269,7 +254,6 @@ class CloakData(object):
def load_embeddings(self, feature_extractors_names): def load_embeddings(self, feature_extractors_names):
dictionaries = [] dictionaries = []
for extractor_name in feature_extractors_names: for extractor_name in feature_extractors_names:
extractor_name = extractor_name.split("/")[-1].split('.')[0].replace("_extract", "")
path2emb = pickle.load(open("../feature_extractors/embeddings/{}_emb_norm.p".format(extractor_name), "rb")) path2emb = pickle.load(open("../feature_extractors/embeddings/{}_emb_norm.p".format(extractor_name), "rb"))
dictionaries.append(path2emb) dictionaries.append(path2emb)
@ -288,14 +272,14 @@ class CloakData(object):
embs = [p[1] for p in items] embs = [p[1] for p in items]
embs = np.array(embs) embs = np.array(embs)
pair_dist = pairwise_distances(original_feature_x, embs, 'l2') pair_dist = pairwise_distances(original_feature_x, embs, metric)
max_sum = np.min(pair_dist, axis=0) max_sum = np.min(pair_dist, axis=0)
sorted_idx = np.argsort(max_sum)[::-1] sorted_idx = np.argsort(max_sum)[::-1]
highest_num = 0 highest_num = 0
paired_target_X = None paired_target_X = None
final_target_class_path = None final_target_class_path = None
for idx in sorted_idx[:2]: for idx in sorted_idx[:5]:
target_class_path = paths[idx] target_class_path = paths[idx]
cur_target_X = self.load_dir(target_class_path) cur_target_X = self.load_dir(target_class_path)
cur_target_X = np.concatenate([cur_target_X, cur_target_X, cur_target_X]) cur_target_X = np.concatenate([cur_target_X, cur_target_X, cur_target_X])