mirror of
https://github.com/Shawn-Shan/fawkes.git
synced 2024-11-09 13:41:31 +05:30
602 lines
19 KiB
Python
602 lines
19 KiB
Python
import glob
|
|
import gzip
|
|
import json
|
|
import os
|
|
import pickle
|
|
import random
|
|
import shutil
|
|
import sys
|
|
import tarfile
|
|
import zipfile
|
|
|
|
import PIL
|
|
import six
|
|
from six.moves.urllib.error import HTTPError, URLError
|
|
|
|
stderr = sys.stderr
|
|
sys.stderr = open(os.devnull, 'w')
|
|
import keras
|
|
|
|
sys.stderr = stderr
|
|
import keras.backend as K
|
|
import numpy as np
|
|
import tensorflow as tf
|
|
from PIL import Image, ExifTags
|
|
from keras.layers import Dense, Activation, Dropout
|
|
from keras.models import Model
|
|
from keras.preprocessing import image
|
|
from skimage.transform import resize
|
|
|
|
from fawkes.align_face import align
|
|
from six.moves.urllib.request import urlopen
|
|
|
|
if sys.version_info[0] == 2:
|
|
def urlretrieve(url, filename, reporthook=None, data=None):
|
|
def chunk_read(response, chunk_size=8192, reporthook=None):
|
|
content_type = response.info().get('Content-Length')
|
|
total_size = -1
|
|
if content_type is not None:
|
|
total_size = int(content_type.strip())
|
|
count = 0
|
|
while True:
|
|
chunk = response.read(chunk_size)
|
|
count += 1
|
|
if reporthook is not None:
|
|
reporthook(count, chunk_size, total_size)
|
|
if chunk:
|
|
yield chunk
|
|
else:
|
|
break
|
|
|
|
response = urlopen(url, data)
|
|
with open(filename, 'wb') as fd:
|
|
for chunk in chunk_read(response, reporthook=reporthook):
|
|
fd.write(chunk)
|
|
else:
|
|
from six.moves.urllib.request import urlretrieve
|
|
|
|
def clip_img(X, preprocessing='raw'):
|
|
X = reverse_preprocess(X, preprocessing)
|
|
X = np.clip(X, 0.0, 255.0)
|
|
X = preprocess(X, preprocessing)
|
|
return X
|
|
|
|
|
|
def load_image(path):
|
|
try:
|
|
img = Image.open(path)
|
|
except PIL.UnidentifiedImageError:
|
|
return None
|
|
|
|
if img._getexif() is not None:
|
|
for orientation in ExifTags.TAGS.keys():
|
|
if ExifTags.TAGS[orientation] == 'Orientation':
|
|
break
|
|
|
|
exif = dict(img._getexif().items())
|
|
if orientation in exif.keys():
|
|
if exif[orientation] == 3:
|
|
img = img.rotate(180, expand=True)
|
|
elif exif[orientation] == 6:
|
|
img = img.rotate(270, expand=True)
|
|
elif exif[orientation] == 8:
|
|
img = img.rotate(90, expand=True)
|
|
else:
|
|
pass
|
|
img = img.convert('RGB')
|
|
image_array = image.img_to_array(img)
|
|
|
|
return image_array
|
|
|
|
|
|
class Faces(object):
|
|
def __init__(self, image_paths, aligner, verbose=1, eval_local=False):
|
|
|
|
self.verbose = verbose
|
|
self.aligner = aligner
|
|
self.org_faces = []
|
|
self.cropped_faces = []
|
|
self.cropped_faces_shape = []
|
|
self.cropped_index = []
|
|
self.callback_idx = []
|
|
if verbose:
|
|
print("Identify {} images".format(len(image_paths)))
|
|
for i, p in enumerate(image_paths):
|
|
cur_img = load_image(p)
|
|
if cur_img is None:
|
|
continue
|
|
|
|
self.org_faces.append(cur_img)
|
|
|
|
if eval_local:
|
|
margin = 0
|
|
else:
|
|
margin = 0.7
|
|
align_img = align(cur_img, self.aligner, margin=margin)
|
|
|
|
if align_img is None:
|
|
print("Find 0 face(s) in {}".format(p.split("/")[-1]))
|
|
continue
|
|
|
|
cur_faces = align_img[0]
|
|
|
|
cur_shapes = [f.shape[:-1] for f in cur_faces]
|
|
|
|
cur_faces_square = []
|
|
if verbose:
|
|
print("Find {} face(s) in {}".format(len(cur_faces), p.split("/")[-1]))
|
|
|
|
for img in cur_faces:
|
|
if eval_local:
|
|
base = resize(img, (224, 224))
|
|
else:
|
|
long_size = max([img.shape[1], img.shape[0]])
|
|
base = np.zeros((long_size, long_size, 3))
|
|
base[0:img.shape[0], 0:img.shape[1], :] = img
|
|
cur_faces_square.append(base)
|
|
|
|
cur_index = align_img[1]
|
|
cur_faces_square = [resize(f, (224, 224)) for f in cur_faces_square]
|
|
self.cropped_faces_shape.extend(cur_shapes)
|
|
self.cropped_faces.extend(cur_faces_square)
|
|
self.cropped_index.extend(cur_index)
|
|
self.callback_idx.extend([i] * len(cur_faces_square))
|
|
|
|
if not self.cropped_faces:
|
|
print("No faces detected")
|
|
exit(1)
|
|
|
|
self.cropped_faces = np.array(self.cropped_faces)
|
|
|
|
self.cropped_faces = preprocess(self.cropped_faces, 'imagenet')
|
|
|
|
self.cloaked_cropped_faces = None
|
|
self.cloaked_faces = np.copy(self.org_faces)
|
|
|
|
def get_faces(self):
|
|
return self.cropped_faces
|
|
|
|
def merge_faces(self, cloaks):
|
|
|
|
self.cloaked_faces = np.copy(self.org_faces)
|
|
|
|
for i in range(len(self.cropped_faces)):
|
|
cur_cloak = cloaks[i]
|
|
org_shape = self.cropped_faces_shape[i]
|
|
old_square_shape = max([org_shape[0], org_shape[1]])
|
|
reshape_cloak = resize(cur_cloak, (old_square_shape, old_square_shape))
|
|
reshape_cloak = reshape_cloak[0:org_shape[0], 0:org_shape[1], :]
|
|
|
|
callback_id = self.callback_idx[i]
|
|
bb = self.cropped_index[i]
|
|
self.cloaked_faces[callback_id][bb[1]:bb[3], bb[0]:bb[2], :] += reshape_cloak
|
|
|
|
return self.cloaked_faces
|
|
|
|
|
|
def dump_dictionary_as_json(dict, outfile):
|
|
j = json.dumps(dict)
|
|
with open(outfile, "wb") as f:
|
|
f.write(j.encode())
|
|
|
|
|
|
def load_victim_model(number_classes, teacher_model=None, end2end=False, dropout=0):
|
|
for l in teacher_model.layers:
|
|
l.trainable = end2end
|
|
x = teacher_model.layers[-1].output
|
|
if dropout > 0:
|
|
x = Dropout(dropout)(x)
|
|
x = Dense(number_classes)(x)
|
|
x = Activation('softmax', name="act")(x)
|
|
model = Model(teacher_model.input, x)
|
|
opt = keras.optimizers.Adadelta()
|
|
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
|
|
return model
|
|
|
|
|
|
def init_gpu(gpu_index, force=False):
|
|
if isinstance(gpu_index, list):
|
|
gpu_num = ','.join([str(i) for i in gpu_index])
|
|
else:
|
|
gpu_num = str(gpu_index)
|
|
if "CUDA_VISIBLE_DEVICES" in os.environ and os.environ["CUDA_VISIBLE_DEVICES"] and not force:
|
|
print('GPU already initiated')
|
|
return
|
|
os.environ["CUDA_VISIBLE_DEVICES"] = gpu_num
|
|
sess = fix_gpu_memory()
|
|
return sess
|
|
|
|
|
|
def fix_gpu_memory(mem_fraction=1):
|
|
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
|
|
tf_config = None
|
|
if tf.test.is_gpu_available():
|
|
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=mem_fraction)
|
|
tf_config = tf.ConfigProto(gpu_options=gpu_options)
|
|
tf_config.gpu_options.allow_growth = True
|
|
tf_config.log_device_placement = False
|
|
init_op = tf.global_variables_initializer()
|
|
sess = tf.Session(config=tf_config)
|
|
sess.run(init_op)
|
|
K.set_session(sess)
|
|
return sess
|
|
|
|
|
|
def preprocess(X, method):
|
|
assert method in {'raw', 'imagenet', 'inception', 'mnist'}
|
|
|
|
if method is 'raw':
|
|
pass
|
|
elif method is 'imagenet':
|
|
X = imagenet_preprocessing(X)
|
|
else:
|
|
raise Exception('unknown method %s' % method)
|
|
|
|
return X
|
|
|
|
|
|
def reverse_preprocess(X, method):
|
|
assert method in {'raw', 'imagenet', 'inception', 'mnist'}
|
|
|
|
if method is 'raw':
|
|
pass
|
|
elif method is 'imagenet':
|
|
X = imagenet_reverse_preprocessing(X)
|
|
else:
|
|
raise Exception('unknown method %s' % method)
|
|
|
|
return X
|
|
|
|
|
|
def imagenet_preprocessing(x, data_format=None):
|
|
if data_format is None:
|
|
data_format = K.image_data_format()
|
|
assert data_format in ('channels_last', 'channels_first')
|
|
|
|
x = np.array(x)
|
|
if data_format == 'channels_first':
|
|
# 'RGB'->'BGR'
|
|
if x.ndim == 3:
|
|
x = x[::-1, ...]
|
|
else:
|
|
x = x[:, ::-1, ...]
|
|
else:
|
|
# 'RGB'->'BGR'
|
|
x = x[..., ::-1]
|
|
|
|
mean = [103.939, 116.779, 123.68]
|
|
std = None
|
|
|
|
# Zero-center by mean pixel
|
|
if data_format == 'channels_first':
|
|
if x.ndim == 3:
|
|
x[0, :, :] -= mean[0]
|
|
x[1, :, :] -= mean[1]
|
|
x[2, :, :] -= mean[2]
|
|
if std is not None:
|
|
x[0, :, :] /= std[0]
|
|
x[1, :, :] /= std[1]
|
|
x[2, :, :] /= std[2]
|
|
else:
|
|
x[:, 0, :, :] -= mean[0]
|
|
x[:, 1, :, :] -= mean[1]
|
|
x[:, 2, :, :] -= mean[2]
|
|
if std is not None:
|
|
x[:, 0, :, :] /= std[0]
|
|
x[:, 1, :, :] /= std[1]
|
|
x[:, 2, :, :] /= std[2]
|
|
else:
|
|
x[..., 0] -= mean[0]
|
|
x[..., 1] -= mean[1]
|
|
x[..., 2] -= mean[2]
|
|
if std is not None:
|
|
x[..., 0] /= std[0]
|
|
x[..., 1] /= std[1]
|
|
x[..., 2] /= std[2]
|
|
|
|
return x
|
|
|
|
|
|
def imagenet_reverse_preprocessing(x, data_format=None):
|
|
import keras.backend as K
|
|
x = np.array(x)
|
|
if data_format is None:
|
|
data_format = K.image_data_format()
|
|
assert data_format in ('channels_last', 'channels_first')
|
|
|
|
if data_format == 'channels_first':
|
|
if x.ndim == 3:
|
|
# Zero-center by mean pixel
|
|
x[0, :, :] += 103.939
|
|
x[1, :, :] += 116.779
|
|
x[2, :, :] += 123.68
|
|
# 'BGR'->'RGB'
|
|
x = x[::-1, :, :]
|
|
else:
|
|
x[:, 0, :, :] += 103.939
|
|
x[:, 1, :, :] += 116.779
|
|
x[:, 2, :, :] += 123.68
|
|
x = x[:, ::-1, :, :]
|
|
else:
|
|
# Zero-center by mean pixel
|
|
x[..., 0] += 103.939
|
|
x[..., 1] += 116.779
|
|
x[..., 2] += 123.68
|
|
# 'BGR'->'RGB'
|
|
x = x[..., ::-1]
|
|
return x
|
|
|
|
|
|
def reverse_process_cloaked(x, preprocess='imagenet'):
|
|
# x = clip_img(x, preprocess)
|
|
return reverse_preprocess(x, preprocess)
|
|
|
|
|
|
def build_bottleneck_model(model, cut_off):
|
|
bottleneck_model = Model(model.input, model.get_layer(cut_off).output)
|
|
bottleneck_model.compile(loss='categorical_crossentropy',
|
|
optimizer='adam',
|
|
metrics=['accuracy'])
|
|
return bottleneck_model
|
|
|
|
|
|
def load_extractor(name):
|
|
model_dir = os.path.join(os.path.expanduser('~'), '.fawkes')
|
|
os.makedirs(model_dir, exist_ok=True)
|
|
model_file = os.path.join(model_dir, "{}.h5".format(name))
|
|
emb_file = os.path.join(model_dir, "{}_emb.p.gz".format(name))
|
|
if os.path.exists(model_file):
|
|
model = keras.models.load_model(model_file)
|
|
else:
|
|
print("Download models...")
|
|
get_file("{}.h5".format(name), "http://sandlab.cs.uchicago.edu/fawkes/files/{}.h5".format(name),
|
|
cache_dir=model_dir, cache_subdir='')
|
|
model = keras.models.load_model(model_file)
|
|
|
|
if not os.path.exists(emb_file):
|
|
get_file("{}_emb.p.gz".format(name), "http://sandlab.cs.uchicago.edu/fawkes/files/{}_emb.p.gz".format(name),
|
|
cache_dir=model_dir, cache_subdir='')
|
|
|
|
if hasattr(model.layers[-1], "activation") and model.layers[-1].activation == "softmax":
|
|
raise Exception(
|
|
"Given extractor's last layer is softmax, need to remove the top layers to make it into a feature extractor")
|
|
return model
|
|
|
|
|
|
def get_dataset_path(dataset):
|
|
model_dir = os.path.join(os.path.expanduser('~'), '.fawkes')
|
|
if not os.path.exists(os.path.join(model_dir, "config.json")):
|
|
raise Exception("Please config the datasets before running protection code. See more in README and config.py.")
|
|
|
|
config = json.load(open(os.path.join(model_dir, "config.json"), 'r'))
|
|
if dataset not in config:
|
|
raise Exception(
|
|
"Dataset {} does not exist, please download to data/ and add the path to this function... Abort".format(
|
|
dataset))
|
|
return config[dataset]['train_dir'], config[dataset]['test_dir'], config[dataset]['num_classes'], config[dataset][
|
|
'num_images']
|
|
|
|
|
|
def normalize(x):
|
|
return x / np.linalg.norm(x, axis=1, keepdims=True)
|
|
|
|
|
|
def dump_image(x, filename, format="png", scale=False):
|
|
# img = image.array_to_img(x, scale=scale)
|
|
img = image.array_to_img(x)
|
|
img.save(filename, format)
|
|
return
|
|
|
|
|
|
def load_dir(path):
|
|
assert os.path.exists(path)
|
|
x_ls = []
|
|
for file in os.listdir(path):
|
|
cur_path = os.path.join(path, file)
|
|
im = image.load_img(cur_path, target_size=(224, 224))
|
|
im = image.img_to_array(im)
|
|
x_ls.append(im)
|
|
raw_x = np.array(x_ls)
|
|
return preprocess(raw_x, 'imagenet')
|
|
|
|
|
|
def load_embeddings(feature_extractors_names):
|
|
model_dir = os.path.join(os.path.expanduser('~'), '.fawkes')
|
|
dictionaries = []
|
|
for extractor_name in feature_extractors_names:
|
|
fp = gzip.open(os.path.join(model_dir, "{}_emb.p.gz".format(extractor_name)), 'rb')
|
|
path2emb = pickle.load(fp)
|
|
fp.close()
|
|
|
|
dictionaries.append(path2emb)
|
|
|
|
merge_dict = {}
|
|
for k in dictionaries[0].keys():
|
|
cur_emb = [dic[k] for dic in dictionaries]
|
|
merge_dict[k] = np.concatenate(cur_emb)
|
|
return merge_dict
|
|
|
|
|
|
def extractor_ls_predict(feature_extractors_ls, X):
|
|
feature_ls = []
|
|
for extractor in feature_extractors_ls:
|
|
cur_features = extractor.predict(X)
|
|
feature_ls.append(cur_features)
|
|
concated_feature_ls = np.concatenate(feature_ls, axis=1)
|
|
concated_feature_ls = normalize(concated_feature_ls)
|
|
return concated_feature_ls
|
|
|
|
|
|
def pairwise_l2_distance(A, B):
|
|
BT = B.transpose()
|
|
vecProd = np.dot(A, BT)
|
|
SqA = A ** 2
|
|
sumSqA = np.matrix(np.sum(SqA, axis=1))
|
|
sumSqAEx = np.tile(sumSqA.transpose(), (1, vecProd.shape[1]))
|
|
|
|
SqB = B ** 2
|
|
sumSqB = np.sum(SqB, axis=1)
|
|
sumSqBEx = np.tile(sumSqB, (vecProd.shape[0], 1))
|
|
SqED = sumSqBEx + sumSqAEx - 2 * vecProd
|
|
SqED[SqED < 0] = 0.0
|
|
ED = np.sqrt(SqED)
|
|
return ED
|
|
|
|
|
|
def calculate_dist_score(a, b, feature_extractors_ls, metric='l2'):
|
|
features1 = extractor_ls_predict(feature_extractors_ls, a)
|
|
features2 = extractor_ls_predict(feature_extractors_ls, b)
|
|
|
|
pair_cos = pairwise_l2_distance(features1, features2)
|
|
max_sum = np.min(pair_cos, axis=0)
|
|
max_sum_arg = np.argsort(max_sum)[::-1]
|
|
max_sum_arg = max_sum_arg[:len(a)]
|
|
max_sum = [max_sum[i] for i in max_sum_arg]
|
|
paired_target_X = [b[j] for j in max_sum_arg]
|
|
paired_target_X = np.array(paired_target_X)
|
|
return np.min(max_sum), paired_target_X
|
|
|
|
|
|
def select_target_label(imgs, feature_extractors_ls, feature_extractors_names, metric='l2'):
|
|
model_dir = os.path.join(os.path.expanduser('~'), '.fawkes')
|
|
|
|
original_feature_x = extractor_ls_predict(feature_extractors_ls, imgs)
|
|
|
|
path2emb = load_embeddings(feature_extractors_names)
|
|
items = list(path2emb.items())
|
|
paths = [p[0] for p in items]
|
|
embs = [p[1] for p in items]
|
|
embs = np.array(embs)
|
|
|
|
pair_dist = pairwise_l2_distance(original_feature_x, embs)
|
|
max_sum = np.min(pair_dist, axis=0)
|
|
max_id = np.argmax(max_sum)
|
|
|
|
target_data_id = paths[int(max_id)]
|
|
image_dir = os.path.join(model_dir, "target_data/{}".format(target_data_id))
|
|
# if not os.path.exists(image_dir):
|
|
os.makedirs(os.path.join(model_dir, "target_data"), exist_ok=True)
|
|
os.makedirs(image_dir, exist_ok=True)
|
|
for i in range(10):
|
|
if os.path.exists(os.path.join(model_dir, "target_data/{}/{}.jpg".format(target_data_id, i))):
|
|
continue
|
|
get_file("{}.jpg".format(i),
|
|
"http://sandlab.cs.uchicago.edu/fawkes/files/target_data/{}/{}.jpg".format(target_data_id, i),
|
|
cache_dir=model_dir, cache_subdir='target_data/{}/'.format(target_data_id))
|
|
|
|
image_paths = glob.glob(image_dir + "/*.jpg")
|
|
|
|
target_images = [image.img_to_array(image.load_img(cur_path)) for cur_path in
|
|
image_paths]
|
|
|
|
target_images = np.array([resize(x, (224, 224)) for x in target_images])
|
|
target_images = preprocess(target_images, 'imagenet')
|
|
|
|
target_images = list(target_images)
|
|
while len(target_images) < len(imgs):
|
|
target_images += target_images
|
|
|
|
target_images = random.sample(target_images, len(imgs))
|
|
return np.array(target_images)
|
|
|
|
|
|
def get_file(fname,
|
|
origin,
|
|
untar=False,
|
|
md5_hash=None,
|
|
file_hash=None,
|
|
cache_subdir='datasets',
|
|
hash_algorithm='auto',
|
|
extract=False,
|
|
archive_format='auto',
|
|
cache_dir=None):
|
|
if cache_dir is None:
|
|
cache_dir = os.path.join(os.path.expanduser('~'), '.fawkes')
|
|
if md5_hash is not None and file_hash is None:
|
|
file_hash = md5_hash
|
|
hash_algorithm = 'md5'
|
|
datadir_base = os.path.expanduser(cache_dir)
|
|
if not os.access(datadir_base, os.W_OK):
|
|
datadir_base = os.path.join('/tmp', '.fawkes')
|
|
datadir = os.path.join(datadir_base, cache_subdir)
|
|
_makedirs_exist_ok(datadir)
|
|
|
|
if untar:
|
|
untar_fpath = os.path.join(datadir, fname)
|
|
fpath = untar_fpath + '.tar.gz'
|
|
else:
|
|
fpath = os.path.join(datadir, fname)
|
|
|
|
download = False
|
|
if not os.path.exists(fpath):
|
|
download = True
|
|
|
|
if download:
|
|
error_msg = 'URL fetch failure on {}: {} -- {}'
|
|
dl_progress = None
|
|
try:
|
|
try:
|
|
urlretrieve(origin, fpath, dl_progress)
|
|
except HTTPError as e:
|
|
raise Exception(error_msg.format(origin, e.code, e.msg))
|
|
except URLError as e:
|
|
raise Exception(error_msg.format(origin, e.errno, e.reason))
|
|
except (Exception, KeyboardInterrupt) as e:
|
|
if os.path.exists(fpath):
|
|
os.remove(fpath)
|
|
raise
|
|
# ProgressTracker.progbar = None
|
|
|
|
if untar:
|
|
if not os.path.exists(untar_fpath):
|
|
_extract_archive(fpath, datadir, archive_format='tar')
|
|
return untar_fpath
|
|
|
|
if extract:
|
|
_extract_archive(fpath, datadir, archive_format)
|
|
|
|
return fpath
|
|
|
|
|
|
def _extract_archive(file_path, path='.', archive_format='auto'):
|
|
if archive_format is None:
|
|
return False
|
|
if archive_format == 'auto':
|
|
archive_format = ['tar', 'zip']
|
|
if isinstance(archive_format, six.string_types):
|
|
archive_format = [archive_format]
|
|
|
|
for archive_type in archive_format:
|
|
if archive_type == 'tar':
|
|
open_fn = tarfile.open
|
|
is_match_fn = tarfile.is_tarfile
|
|
if archive_type == 'zip':
|
|
open_fn = zipfile.ZipFile
|
|
is_match_fn = zipfile.is_zipfile
|
|
|
|
if is_match_fn(file_path):
|
|
with open_fn(file_path) as archive:
|
|
try:
|
|
archive.extractall(path)
|
|
except (tarfile.TarError, RuntimeError, KeyboardInterrupt):
|
|
if os.path.exists(path):
|
|
if os.path.isfile(path):
|
|
os.remove(path)
|
|
else:
|
|
shutil.rmtree(path)
|
|
raise
|
|
return True
|
|
return False
|
|
|
|
|
|
def _makedirs_exist_ok(datadir):
|
|
if six.PY2:
|
|
# Python 2 doesn't have the exist_ok arg, so we try-except here.
|
|
try:
|
|
os.makedirs(datadir)
|
|
except OSError as e:
|
|
if e.errno != errno.EEXIST:
|
|
raise
|
|
else:
|
|
os.makedirs(datadir, exist_ok=True) # pylint: disable=unexpected-keyword-arg
|