mirror of
https://github.com/Shawn-Shan/fawkes.git
synced 2024-12-22 07:09:33 +05:30
standarize feature extractors
This commit is contained in:
parent
de558e841e
commit
83bb798373
42
fawkes/config.py
Normal file
42
fawkes/config.py
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
import glob
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
|
||||||
|
DATASETS = {
|
||||||
|
"pubfig": "../data/pubfig/",
|
||||||
|
"scrub": "../data/scrub/",
|
||||||
|
"vggface1": "/mnt/data/sixiongshan/data/vggface/",
|
||||||
|
# "vggface2": "/mnt/data/sixiongshan/data/vggface2/",
|
||||||
|
"webface": "/mnt/data/sixiongshan/data/webface/",
|
||||||
|
# "youtubeface": "/mnt/data/sixiongshan/data/youtubeface/keras_flow_data/",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
config = {}
|
||||||
|
for dataset in DATASETS.keys():
|
||||||
|
path = DATASETS[dataset]
|
||||||
|
if not os.path.exists(path):
|
||||||
|
print("Dataset path for {} does not exist, skipped".format(dataset))
|
||||||
|
continue
|
||||||
|
train_dir = os.path.join(path, "train")
|
||||||
|
test_dir = os.path.join(path, "test")
|
||||||
|
if not os.path.exists(train_dir):
|
||||||
|
print("Training dataset path for {} does not exist, skipped".format(dataset))
|
||||||
|
continue
|
||||||
|
num_classes = len(os.listdir(train_dir))
|
||||||
|
num_images = len(glob.glob(os.path.join(train_dir, "*/*")))
|
||||||
|
if num_images == 0 or num_classes == 0 or num_images == num_classes:
|
||||||
|
raise Exception("Dataset {} is not setup as detailed in README.".format(dataset))
|
||||||
|
|
||||||
|
config[dataset] = {"train_dir": train_dir, "test_dir": test_dir, "num_classes": num_classes,
|
||||||
|
"num_images": num_images}
|
||||||
|
print("Successfully config {}".format(dataset))
|
||||||
|
j = json.dumps(config)
|
||||||
|
with open("config.json", "wb") as f:
|
||||||
|
f.write(j.encode())
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
@ -7,6 +7,7 @@
|
|||||||
import datetime
|
import datetime
|
||||||
import time
|
import time
|
||||||
from decimal import Decimal
|
from decimal import Decimal
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
from utils import preprocess, reverse_preprocess
|
from utils import preprocess, reverse_preprocess
|
||||||
|
@ -102,9 +102,6 @@ def main():
|
|||||||
else:
|
else:
|
||||||
raise ValueError
|
raise ValueError
|
||||||
|
|
||||||
CLOAK_DIR = CLOAK_DIR + "_th{}_sd{}".format(args.th, int(args.sd))
|
|
||||||
print(CLOAK_DIR)
|
|
||||||
|
|
||||||
CLOAK_DIR = os.path.join("../results", CLOAK_DIR)
|
CLOAK_DIR = os.path.join("../results", CLOAK_DIR)
|
||||||
RES = pickle.load(open(os.path.join(CLOAK_DIR, "cloak_data.p"), 'rb'))
|
RES = pickle.load(open(os.path.join(CLOAK_DIR, "cloak_data.p"), 'rb'))
|
||||||
|
|
||||||
@ -127,7 +124,7 @@ def main():
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
model.fit_generator(train_generator, steps_per_epoch=cloak_data.number_samples // 32,
|
model.fit_generator(train_generator, steps_per_epoch=cloak_data.number_samples // 32,
|
||||||
validation_data=(original_X, original_Y), epochs=args.n_epochs, verbose=1,
|
validation_data=(original_X, original_Y), epochs=args.n_epochs, verbose=2,
|
||||||
use_multiprocessing=False, workers=1)
|
use_multiprocessing=False, workers=1)
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
pass
|
pass
|
||||||
@ -144,7 +141,8 @@ def main():
|
|||||||
print("Accuracy on other classes {:.4f}".format(other_acc))
|
print("Accuracy on other classes {:.4f}".format(other_acc))
|
||||||
EVAL_RES['other_acc'] = other_acc
|
EVAL_RES['other_acc'] = other_acc
|
||||||
dump_dictionary_as_json(EVAL_RES,
|
dump_dictionary_as_json(EVAL_RES,
|
||||||
os.path.join(CLOAK_DIR, "eval_seed{}_th{}.json".format(args.seed_idx, args.th)))
|
os.path.join(CLOAK_DIR,
|
||||||
|
"eval_seed{}_th{}_sd{}.json".format(args.seed_idx, args.th, args.sd)))
|
||||||
|
|
||||||
|
|
||||||
def parse_arguments(argv):
|
def parse_arguments(argv):
|
||||||
@ -158,7 +156,7 @@ def parse_arguments(argv):
|
|||||||
help='name of dataset', default='scrub')
|
help='name of dataset', default='scrub')
|
||||||
parser.add_argument('--cloak_data', type=str,
|
parser.add_argument('--cloak_data', type=str,
|
||||||
help='name of the cloak result directory',
|
help='name of the cloak result directory',
|
||||||
default='scrub_webface_dense_robust_protectPatrick_Dempsey')
|
default='scrub_webface_dense_robust_protectKristen_Alderson')
|
||||||
|
|
||||||
parser.add_argument('--sd', type=int, default=1e6)
|
parser.add_argument('--sd', type=int, default=1e6)
|
||||||
parser.add_argument('--th', type=float, default=0.01)
|
parser.add_argument('--th', type=float, default=0.01)
|
||||||
@ -167,7 +165,7 @@ def parse_arguments(argv):
|
|||||||
help='student model', default='../feature_extractors/vggface2_inception_extract.h5')
|
help='student model', default='../feature_extractors/vggface2_inception_extract.h5')
|
||||||
parser.add_argument('--batch_size', type=int, default=32)
|
parser.add_argument('--batch_size', type=int, default=32)
|
||||||
parser.add_argument('--validation_split', type=float, default=0.1)
|
parser.add_argument('--validation_split', type=float, default=0.1)
|
||||||
parser.add_argument('--n_epochs', type=int, default=3)
|
parser.add_argument('--n_epochs', type=int, default=5)
|
||||||
return parser.parse_args(argv)
|
return parser.parse_args(argv)
|
||||||
|
|
||||||
|
|
||||||
|
64
fawkes/prepare_feature_extractor.py
Normal file
64
fawkes/prepare_feature_extractor.py
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
import argparse
|
||||||
|
import os
|
||||||
|
import pickle
|
||||||
|
import random
|
||||||
|
import sys
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from keras.applications.vgg16 import preprocess_input
|
||||||
|
from keras.preprocessing import image
|
||||||
|
from utils import load_extractor, get_dataset_path
|
||||||
|
|
||||||
|
|
||||||
|
def load_sample_dir(path, sample=10):
|
||||||
|
x_ls = []
|
||||||
|
image_paths = list(os.listdir(path))
|
||||||
|
random.shuffle(image_paths)
|
||||||
|
for i, file in enumerate(image_paths):
|
||||||
|
if i > sample:
|
||||||
|
break
|
||||||
|
cur_path = os.path.join(path, file)
|
||||||
|
im = image.load_img(cur_path, target_size=(224, 224))
|
||||||
|
im = image.img_to_array(im)
|
||||||
|
x_ls.append(im)
|
||||||
|
raw_x = np.array(x_ls)
|
||||||
|
return preprocess_input(raw_x)
|
||||||
|
|
||||||
|
|
||||||
|
def normalize(x):
|
||||||
|
return x / np.linalg.norm(x)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
extractor = load_extractor(args.feature_extractor)
|
||||||
|
path2emb = {}
|
||||||
|
for target_dataset in args.candidate_datasets:
|
||||||
|
target_dataset_path, _, _, _ = get_dataset_path(target_dataset)
|
||||||
|
for target_class in os.listdir(target_dataset_path):
|
||||||
|
target_class_path = os.path.join(target_dataset_path, target_class)
|
||||||
|
target_X = load_sample_dir(target_class_path)
|
||||||
|
cur_feature = extractor.predict(target_X)
|
||||||
|
cur_feature = np.mean(cur_feature, axis=0)
|
||||||
|
path2emb[target_class_path] = cur_feature
|
||||||
|
|
||||||
|
for k, v in path2emb.items():
|
||||||
|
path2emb[k] = normalize(v)
|
||||||
|
|
||||||
|
pickle.dump(path2emb, open("../feature_extractors/embeddings/{}_emb_norm.p".format(args.feature_extractor), "wb"))
|
||||||
|
|
||||||
|
|
||||||
|
def parse_arguments(argv):
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument('--gpu', type=str,
|
||||||
|
help='GPU id', default='0')
|
||||||
|
parser.add_argument('--candidate-datasets', nargs='+',
|
||||||
|
help='path candidate datasets')
|
||||||
|
parser.add_argument('--feature-extractor', type=str,
|
||||||
|
help="name of the feature extractor used for optimization",
|
||||||
|
default="webface_dense_robust_extract")
|
||||||
|
return parser.parse_args(argv)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
args = parse_arguments(sys.argv[1:])
|
||||||
|
main()
|
@ -9,13 +9,12 @@ from differentiator import FawkesMaskGeneration
|
|||||||
from tensorflow import set_random_seed
|
from tensorflow import set_random_seed
|
||||||
from utils import load_extractor, CloakData, init_gpu
|
from utils import load_extractor, CloakData, init_gpu
|
||||||
|
|
||||||
#
|
|
||||||
random.seed(12243)
|
random.seed(12243)
|
||||||
np.random.seed(122412)
|
np.random.seed(122412)
|
||||||
set_random_seed(12242)
|
set_random_seed(12242)
|
||||||
|
|
||||||
NUM_IMG_PROTECTED = 10 # Number of images used to optimize the target class
|
NUM_IMG_PROTECTED = 32 # Number of images used to optimize the target class
|
||||||
BATCH_SIZE = 10
|
BATCH_SIZE = 32
|
||||||
|
|
||||||
IMG_SHAPE = [224, 224, 3]
|
IMG_SHAPE = [224, 224, 3]
|
||||||
|
|
||||||
@ -53,16 +52,13 @@ def perform_defense():
|
|||||||
num_protect = NUM_IMG_PROTECTED
|
num_protect = NUM_IMG_PROTECTED
|
||||||
|
|
||||||
print("Loading {} for optimization".format(args.feature_extractor))
|
print("Loading {} for optimization".format(args.feature_extractor))
|
||||||
feature_extractors_ls = [load_extractor(name, layer_idx=args.layer_idx) for name in FEATURE_EXTRACTORS]
|
feature_extractors_ls = [load_extractor(name) for name in FEATURE_EXTRACTORS]
|
||||||
protect_class = args.protect_class
|
protect_class = args.protect_class
|
||||||
|
|
||||||
cloak_data = CloakData(args.dataset, target_selection_tries=1, protect_class=protect_class)
|
cloak_data = CloakData(args.dataset, target_selection_tries=1, protect_class=protect_class)
|
||||||
model_name = args.feature_extractor.split("/")[-1].split('.')[0].replace("_extract", "")
|
model_name = args.feature_extractor.split("/")[-1].split('.')[0].replace("_extract", "")
|
||||||
RES_FILE_NAME = "{}_{}_protect{}_th{}_sd{}".format(args.dataset, model_name, cloak_data.protect_class, args.th,
|
RES_FILE_NAME = "{}_{}_protect{}".format(args.dataset, model_name, cloak_data.protect_class)
|
||||||
args.sd)
|
|
||||||
RES_FILE_NAME = os.path.join(RES_DIR, RES_FILE_NAME)
|
RES_FILE_NAME = os.path.join(RES_DIR, RES_FILE_NAME)
|
||||||
if os.path.exists(RES_FILE_NAME):
|
|
||||||
exit(1)
|
|
||||||
print("Protect Class: ", cloak_data.protect_class)
|
print("Protect Class: ", cloak_data.protect_class)
|
||||||
|
|
||||||
cloak_data.target_path, cloak_data.target_data = cloak_data.select_target_label(feature_extractors_ls,
|
cloak_data.target_path, cloak_data.target_data = cloak_data.select_target_label(feature_extractors_ls,
|
||||||
@ -88,13 +84,9 @@ def parse_arguments(argv):
|
|||||||
help='name of dataset', default='scrub')
|
help='name of dataset', default='scrub')
|
||||||
parser.add_argument('--feature-extractor', type=str,
|
parser.add_argument('--feature-extractor', type=str,
|
||||||
help="name of the feature extractor used for optimization",
|
help="name of the feature extractor used for optimization",
|
||||||
default="../feature_extractors/webface_dense_robust_extract.h5")
|
default="webface_dense_robust")
|
||||||
parser.add_argument('--layer-idx', type=int,
|
parser.add_argument('--th', type=float, default=0.007)
|
||||||
help="the idx of the layer of neuron that are used as feature space",
|
parser.add_argument('--sd', type=int, default=1e5)
|
||||||
default=-3)
|
|
||||||
|
|
||||||
parser.add_argument('--th', type=float, default=0.01)
|
|
||||||
parser.add_argument('--sd', type=int, default=1e4)
|
|
||||||
parser.add_argument('--protect_class', type=str, default=None)
|
parser.add_argument('--protect_class', type=str, default=None)
|
||||||
parser.add_argument('--lr', type=float, default=0.1)
|
parser.add_argument('--lr', type=float, default=0.1)
|
||||||
|
|
||||||
|
@ -68,7 +68,6 @@ def init_gpu(gpu_index, force=False):
|
|||||||
|
|
||||||
|
|
||||||
def preprocess(X, method):
|
def preprocess(X, method):
|
||||||
# assume color last
|
|
||||||
assert method in {'raw', 'imagenet', 'inception', 'mnist'}
|
assert method in {'raw', 'imagenet', 'inception', 'mnist'}
|
||||||
|
|
||||||
if method is 'raw':
|
if method is 'raw':
|
||||||
@ -82,7 +81,6 @@ def preprocess(X, method):
|
|||||||
|
|
||||||
|
|
||||||
def reverse_preprocess(X, method):
|
def reverse_preprocess(X, method):
|
||||||
# assume color last
|
|
||||||
assert method in {'raw', 'imagenet', 'inception', 'mnist'}
|
assert method in {'raw', 'imagenet', 'inception', 'mnist'}
|
||||||
|
|
||||||
if method is 'raw':
|
if method is 'raw':
|
||||||
@ -146,13 +144,6 @@ def imagenet_preprocessing(x, data_format=None):
|
|||||||
|
|
||||||
def imagenet_reverse_preprocessing(x, data_format=None):
|
def imagenet_reverse_preprocessing(x, data_format=None):
|
||||||
import keras.backend as K
|
import keras.backend as K
|
||||||
""" Reverse preprocesses a tensor encoding a batch of images.
|
|
||||||
# Arguments
|
|
||||||
x: input Numpy tensor, 4D.
|
|
||||||
data_format: data format of the image tensor.
|
|
||||||
# Returns
|
|
||||||
Preprocessed tensor.
|
|
||||||
"""
|
|
||||||
x = np.array(x)
|
x = np.array(x)
|
||||||
if data_format is None:
|
if data_format is None:
|
||||||
data_format = K.image_data_format()
|
data_format = K.image_data_format()
|
||||||
@ -189,37 +180,32 @@ def build_bottleneck_model(model, cut_off):
|
|||||||
return bottleneck_model
|
return bottleneck_model
|
||||||
|
|
||||||
|
|
||||||
def load_extractor(name, layer_idx=None):
|
def load_extractor(name):
|
||||||
model = keras.models.load_model(name)
|
model = keras.models.load_model("../feature_extractors/{}.h5".format(name))
|
||||||
|
if hasattr(model.layers[-1], "activation") and model.layers[-1].activation == "softmax":
|
||||||
if "extract" in name.split("/")[-1]:
|
raise Exception(
|
||||||
model = keras.models.load_model(name)
|
"Given extractor's last layer is softmax, need to remove the top layers to make it into a feature extractor")
|
||||||
else:
|
# if "extract" in name.split("/")[-1]:
|
||||||
print("Convert a model to a feature extractor")
|
# pass
|
||||||
model = build_bottleneck_model(model, model.layers[layer_idx].name)
|
# else:
|
||||||
model.save(name + "extract")
|
# print("Convert a model to a feature extractor")
|
||||||
model = keras.models.load_model(name + "extract")
|
# model = build_bottleneck_model(model, model.layers[layer_idx].name)
|
||||||
|
# model.save(name + "extract")
|
||||||
|
# model = keras.models.load_model(name + "extract")
|
||||||
return model
|
return model
|
||||||
|
|
||||||
|
|
||||||
def get_dataset_path(dataset):
|
def get_dataset_path(dataset):
|
||||||
if dataset == "scrub":
|
if not os.path.exists("config.json"):
|
||||||
train_data_dir = '../data/scrub/train'
|
raise Exception("Please config the datasets before running protection code. See more in README and config.py.")
|
||||||
test_data_dir = '../data/scrub/test'
|
|
||||||
number_classes = 530
|
config = json.load(open("config.json", 'r'))
|
||||||
number_samples = 57838
|
if dataset not in config:
|
||||||
elif dataset == "pubfig":
|
|
||||||
train_data_dir = '../data/pubfig/train'
|
|
||||||
test_data_dir = '../data/pubfig/test'
|
|
||||||
number_classes = 65
|
|
||||||
number_samples = 5979
|
|
||||||
else:
|
|
||||||
raise Exception(
|
raise Exception(
|
||||||
"Dataset {} does not exist, please download to data/ and add the path to this function... Abort".format(
|
"Dataset {} does not exist, please download to data/ and add the path to this function... Abort".format(
|
||||||
dataset))
|
dataset))
|
||||||
|
return config[dataset]['train_dir'], config[dataset]['test_dir'], config[dataset]['num_classes'], config[dataset][
|
||||||
return train_data_dir, test_data_dir, number_classes, number_samples
|
'num_images']
|
||||||
|
|
||||||
|
|
||||||
def normalize(x):
|
def normalize(x):
|
||||||
@ -227,10 +213,9 @@ def normalize(x):
|
|||||||
|
|
||||||
|
|
||||||
class CloakData(object):
|
class CloakData(object):
|
||||||
def __init__(self, dataset, img_shape=(224, 224), target_selection_tries=30, protect_class=None):
|
def __init__(self, dataset, img_shape=(224, 224), protect_class=None):
|
||||||
self.dataset = dataset
|
self.dataset = dataset
|
||||||
self.img_shape = img_shape
|
self.img_shape = img_shape
|
||||||
self.target_selection_tries = target_selection_tries
|
|
||||||
|
|
||||||
self.train_data_dir, self.test_data_dir, self.number_classes, self.number_samples = get_dataset_path(dataset)
|
self.train_data_dir, self.test_data_dir, self.number_classes, self.number_samples = get_dataset_path(dataset)
|
||||||
self.all_labels = sorted(list(os.listdir(self.train_data_dir)))
|
self.all_labels = sorted(list(os.listdir(self.train_data_dir)))
|
||||||
@ -269,7 +254,6 @@ class CloakData(object):
|
|||||||
def load_embeddings(self, feature_extractors_names):
|
def load_embeddings(self, feature_extractors_names):
|
||||||
dictionaries = []
|
dictionaries = []
|
||||||
for extractor_name in feature_extractors_names:
|
for extractor_name in feature_extractors_names:
|
||||||
extractor_name = extractor_name.split("/")[-1].split('.')[0].replace("_extract", "")
|
|
||||||
path2emb = pickle.load(open("../feature_extractors/embeddings/{}_emb_norm.p".format(extractor_name), "rb"))
|
path2emb = pickle.load(open("../feature_extractors/embeddings/{}_emb_norm.p".format(extractor_name), "rb"))
|
||||||
dictionaries.append(path2emb)
|
dictionaries.append(path2emb)
|
||||||
|
|
||||||
@ -288,14 +272,14 @@ class CloakData(object):
|
|||||||
embs = [p[1] for p in items]
|
embs = [p[1] for p in items]
|
||||||
embs = np.array(embs)
|
embs = np.array(embs)
|
||||||
|
|
||||||
pair_dist = pairwise_distances(original_feature_x, embs, 'l2')
|
pair_dist = pairwise_distances(original_feature_x, embs, metric)
|
||||||
max_sum = np.min(pair_dist, axis=0)
|
max_sum = np.min(pair_dist, axis=0)
|
||||||
sorted_idx = np.argsort(max_sum)[::-1]
|
sorted_idx = np.argsort(max_sum)[::-1]
|
||||||
|
|
||||||
highest_num = 0
|
highest_num = 0
|
||||||
paired_target_X = None
|
paired_target_X = None
|
||||||
final_target_class_path = None
|
final_target_class_path = None
|
||||||
for idx in sorted_idx[:2]:
|
for idx in sorted_idx[:5]:
|
||||||
target_class_path = paths[idx]
|
target_class_path = paths[idx]
|
||||||
cur_target_X = self.load_dir(target_class_path)
|
cur_target_X = self.load_dir(target_class_path)
|
||||||
cur_target_X = np.concatenate([cur_target_X, cur_target_X, cur_target_X])
|
cur_target_X = np.concatenate([cur_target_X, cur_target_X, cur_target_X])
|
||||||
|
Loading…
Reference in New Issue
Block a user