2
0
mirror of https://github.com/Shawn-Shan/fawkes.git synced 2026-06-12 21:50:46 +05:30
Former-commit-id: 268fb7e6825ddfc1165fa7adc7c216f9d61005da [formerly 06376993a831c060c337ec6e7540252f0b2dfe09]
Former-commit-id: c4812d40187a76a878e7d215d22ee84811b41896
This commit is contained in:
Shawn-Shan
2020-07-01 21:16:03 -05:00
parent 3ba2abacf2
commit 889fd933e8
19 changed files with 1647 additions and 1099 deletions
+1
View File
@@ -52,6 +52,7 @@ We shared three different feature extractors under feature_extractors/
1. low_extract.h5: trained on WebFace dataset with DenseNet architecture.
2. mid_extract.h5: VGGFace2 dataset with DenseNet architecture. Trained with PGD adversarial training for 5 epochs.
3. high_extract.h5: WebFace dataset with DenseNet architecture. Trained with PGD adversarial training for 20 epochs.
4. high2_extract.h5: VGGFace2 dataset with DenseNet architecture. Trained with PGD adversarial training for 20 epochs.
### Citation
```
+399
View File
@@ -0,0 +1,399 @@
import http.client, urllib.request, urllib.parse, urllib.error
import json
import time
#Face API Key and Endpoint
subscription_key = 'e127e26e4d534e2bad6fd9ca06145302'
uri_base = 'eastus.api.cognitive.microsoft.com'
# uri_base = 'https://shawn.cognitiveservices.azure.com/'
def detect_face(image_url):
headers = {
# Request headers
'Content-Type': 'application/json',
'Ocp-Apim-Subscription-Key': subscription_key,
}
params = urllib.parse.urlencode({
# Request parameters
'returnFaceId': 'true',
'returnFaceLandmarks': 'false',
'recognitionModel': 'recognition_01',
'returnRecognitionModel': 'false',
'detectionModel': 'detection_01',
})
body = json.dumps({
'url': image_url
})
conn = http.client.HTTPSConnection(uri_base)
conn.request("POST", "/face/v1.0/detect?%s" % params, body, headers)
response = conn.getresponse()
data = json.loads(response.read())
conn.close()
return data[0]["faceId"]
def verify_face(faceId, personGroupId, personId):
# html header
headers = {
'Content-Type': 'application/json',
'Ocp-Apim-Subscription-Key': subscription_key,
}
params = urllib.parse.urlencode({
})
# image URL
body = json.dumps({
"faceId": faceId,
"personId": personId,
"PersonGroupId": personGroupId
})
# Call Face API
conn = http.client.HTTPSConnection(uri_base)
conn.request("POST", "/face/v1.0/verify?%s" % params, body, headers)
response = conn.getresponse()
data = json.loads(response.read())
conn.close()
return data
def create_personGroupId(personGroupId, personGroupName):
headers = {
# Request headers
'Content-Type': 'application/json',
'Ocp-Apim-Subscription-Key': subscription_key,
}
params = urllib.parse.urlencode({
})
body = json.dumps({
"name": personGroupName
})
conn = http.client.HTTPSConnection(uri_base)
conn.request("PUT", "/face/v1.0/persongroups/{}?%s".format(personGroupId) % params, body, headers)
response = conn.getresponse()
data = response.read()
print(data)
conn.close()
def create_personId(personGroupId, personName):
headers = {
# Request headers
'Content-Type': 'application/json',
'Ocp-Apim-Subscription-Key': subscription_key,
}
params = urllib.parse.urlencode({
})
body = json.dumps({
"name": personName
})
conn = http.client.HTTPSConnection(uri_base)
conn.request("POST", "/face/v1.0/persongroups/{}/persons?%s".format(personGroupId) % params, body, headers)
response = conn.getresponse()
data = json.loads(response.read())
print(data)
conn.close()
return data["personId"]
def add_persistedFaceId(personGroupId, personId, image_url):
headers = {
'Content-Type': 'application/json',
'Ocp-Apim-Subscription-Key': subscription_key,
}
params = urllib.parse.urlencode({
'personGroupId': personGroupId,
'personId': personId
})
body = json.dumps({
'url': image_url
})
conn = http.client.HTTPSConnection(uri_base)
conn.request("POST", "/face/v1.0/persongroups/{}/persons/{}/persistedFaces?%s".format(personGroupId, personId) % params, body, headers)
response = conn.getresponse()
data = json.loads(response.read())
print(data)
conn.close()
return data["persistedFaceId"]
def list_personGroupPerson(personGroupId):
headers = {
'Ocp-Apim-Subscription-Key': subscription_key,
}
params = urllib.parse.urlencode({
})
body = json.dumps({})
conn = http.client.HTTPSConnection(uri_base)
conn.request("GET", "/face/v1.0/persongroups/{}/persons?%s".format(personGroupId) % params, body, headers)
response = conn.getresponse()
data = json.loads(response.read())
conn.close()
for person in data:
print(person["personId"], len(person["persistedFaceIds"]))
def get_personGroupPerson(personGroupId, personId):
headers = {
'Ocp-Apim-Subscription-Key': subscription_key,
}
params = urllib.parse.urlencode({
})
body = json.dumps({})
conn = http.client.HTTPSConnection(uri_base)
conn.request("GET", "/face/v1.0/persongroups/{}/persons/{}?%s".format(personGroupId, personId) % params, body, headers)
response = conn.getresponse()
data = json.loads(response.read())
print(data)
conn.close()
def train_personGroup(personGroupId):
headers = {
'Ocp-Apim-Subscription-Key': subscription_key,
}
params = urllib.parse.urlencode({
})
body = json.dumps({})
conn = http.client.HTTPSConnection(uri_base)
conn.request("POST", "/face/v1.0/persongroups/{}/train?%s".format(personGroupId) % params, body, headers)
response = conn.getresponse()
data = response.read()
print(data)
conn.close()
def eval(original_faceIds, personGroupId, protect_personId):
headers = {
'Content-Type': 'application/json',
'Ocp-Apim-Subscription-Key': subscription_key,
}
params = urllib.parse.urlencode({
})
body = json.dumps({
'faceIds': original_faceIds,
'personGroupId': personGroupId,
'maxNumOfCandidatesReturned': 1
})
conn = http.client.HTTPSConnection(uri_base)
conn.request("POST", "/face/v1.0/identify?%s" % params, body, headers)
response = conn.getresponse()
data = json.loads(response.read())
conn.close()
face = data[0]
if len(face["candidates"]) and face["candidates"][0]["personId"] == protect_personId:
return True
else:
return False
def delete_personGroupPerson(personGroupId, personId):
headers = {
'Ocp-Apim-Subscription-Key': subscription_key,
}
params = urllib.parse.urlencode({
})
body = json.dumps({})
conn = http.client.HTTPSConnection(uri_base)
conn.request("DELETE", "/face/v1.0/persongroups/{}/persons/{}?%s".format(personGroupId, personId) % params, body, headers)
response = conn.getresponse()
data = response.read()
print(data)
conn.close()
def add_protect_person(personGroupId, name):
personId = create_personId(personGroupId, name)
for idx in range(72):
cloaked_image_url = "https://super.cs.uchicago.edu/~shawn/cloaked/{}_c.png".format(idx)
add_persistedFaceId(personGroupId, personId, cloaked_image_url)
def add_sybil_person(personGroupId, name):
personId = create_personId(personGroupId, name)
for idx in range(82):
try:
cloaked_image_url = "https://super.cs.uchicago.edu/~shawn/sybils/{}_c.png".format(idx)
add_persistedFaceId(personGroupId, personId, cloaked_image_url)
except:
print(idx)
def add_other_person(personGroupId):
for idx_person in range(65):
personId = create_personId(personGroupId, str(idx_person))
for idx_image in range(90):
try:
image_url = "https://super.cs.uchicago.edu/~shawn/train/{}/{}.png".format(idx_person, idx_image)
add_persistedFaceId(personGroupId, personId, image_url)
except:
print(idx_person, idx_image)
def get_trainStatus(personGroupId):
headers = {
'Ocp-Apim-Subscription-Key': subscription_key,
}
params = urllib.parse.urlencode({
})
body = json.dumps({})
conn = http.client.HTTPSConnection(uri_base)
conn.request("GET", "/face/v1.0/persongroups/{}/training?%s".format(personGroupId) % params, body, headers)
response = conn.getresponse()
data = response.read()
print(data)
conn.close()
def test_original():
personGroupId = 'pubfig'
# create_personGroupId(personGroupId, 'pubfig')
# add protect person
protect_personId = 'd3df3012-6f3f-4c1b-b86d-55e91a352e01'
#protect_personId = create_personId(personGroupId, 'Emily')
#for idx in range(50):
# image_url = "https://super.cs.uchicago.edu/~shawn/cloaked/{}_o.png".format(idx)
# add_persistedFaceId(personGroupId, protect_personId, image_url)
# add other people
#for idx_person in range(65):
# personId = create_personId(personGroupId, str(idx_person))
# for idx_image in range(50):
# try:
# image_url = "https://super.cs.uchicago.edu/~shawn/train/{}/{}.png".format(idx_person, idx_image)
# add_persistedFaceId(personGroupId, personId, image_url)
# except:
# print(idx_person, idx_image)
# train model based on personGroup
#train_personGroup(personGroupId)
#time.sleep(3)
#get_trainStatus(personGroupId)
#list_personGroupPerson(personGroupId)
idx_range = range(50, 82)
acc = 0.
for idx in idx_range:
original_image_url = "https://super.cs.uchicago.edu/~shawn/cloaked/{}_o.png".format(idx)
faceId = detect_face(original_image_url)
original_faceIds = [faceId]
# verify
res = eval(original_faceIds, personGroupId, protect_personId)
if res:
acc += 1.
acc /= len(idx_range)
print(acc) # 1.0
def list_personGroups():
headers = {
'Ocp-Apim-Subscription-Key': subscription_key,
}
params = urllib.parse.urlencode({
})
body = json.dumps({})
conn = http.client.HTTPSConnection(uri_base)
conn.request("GET", "/face/v1.0/persongroups?%s" % params, body, headers)
response = conn.getresponse()
data = response.read()
print(data)
conn.close()
def delete_personGroup(personGroupId):
headers = {
'Ocp-Apim-Subscription-Key': subscription_key,
}
params = urllib.parse.urlencode({
})
body = json.dumps({})
conn = http.client.HTTPSConnection(uri_base)
conn.request("DELETE", "/face/v1.0/persongroups/{}?%s".format(personGroupId) % params, body, headers)
response = conn.getresponse()
data = response.read()
print(data)
conn.close()
def main():
# delete_personGroup('cloaking')
# delete_personGroup('cloaking-emily')
# delete_personGroup('pubfig')
# list_personGroups()
# exit()
personGroupId = 'cloaking'
# create_personGroupId(personGroupId, 'cloaking')
list_personGroups()
exit()
#delete_personGroupPerson(personGroupId, '0ac606cd-24b3-440f-866a-31adf2a1b446')
#add_protect_person(personGroupId, 'Emily')
#personId = create_personId(personGroupId, 'Emily')
#add_sybil_person(personGroupId, 'sybil')
protect_personId = '6c5a71eb-f39a-4570-b3f5-72cca3ab5a6b'
#delete_personGroupPerson(personGroupId, protect_personId)
#add_protect_person(personGroupId, 'Emily')
# train model based on personGroup
#train_personGroup(personGroupId)
get_trainStatus(personGroupId)
#add_other_person(personGroupId)
#list_personGroupPerson(personGroupId)
#delete_personGroupPerson(personGroupId, '80e32c80-bc69-416a-9dff-c8d42d7a3301')
idx_range = range(72, 82)
original_faceIds = []
for idx in idx_range:
original_image_url = "https://super.cs.uchicago.edu/~shawn/cloaked/{}_o.png".format(idx)
faceId = detect_face(original_image_url)
original_faceIds.append(faceId)
# verify
eval(original_faceIds, personGroupId, protect_personId)
if __name__ == '__main__':
main()
+3 -2
View File
@@ -4,7 +4,7 @@ import os
DATASETS = {
"pubfig": "../data/pubfig",
"scrub": "/home/shansixioing/cloak/fawkes/data/scrub/",
"scrub": "/home/shansixioing/fawkes/data/scrub/",
"vggface2": "/mnt/data/sixiongshan/data/vggface2/",
"webface": "/mnt/data/sixiongshan/data/webface/",
"youtubeface": "/mnt/data/sixiongshan/data/youtubeface/keras_flow_data/",
@@ -32,7 +32,8 @@ def main():
"num_images": num_images}
print("Successfully config {}".format(dataset))
j = json.dumps(config)
with open("config.json", "wb") as f:
model_dir = os.path.join(os.path.expanduser('~'), '.fawkes')
with open(os.path.join(model_dir, "config.json"), "wb") as f:
f.write(j.encode())
-430
View File
@@ -1,430 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Date : 2020-05-17
# @Author : Shawn Shan (shansixiong@cs.uchicago.edu)
# @Link : https://www.shawnshan.com/
import datetime
import time
from decimal import Decimal
import numpy as np
import tensorflow as tf
from utils import preprocess, reverse_preprocess
class FawkesMaskGeneration:
# if the attack is trying to mimic a target image or a neuron vector
MIMIC_IMG = True
# number of iterations to perform gradient descent
MAX_ITERATIONS = 10000
# larger values converge faster to less accurate results
LEARNING_RATE = 1e-2
# the initial constant c to pick as a first guess
INITIAL_CONST = 1
# pixel intensity range
INTENSITY_RANGE = 'imagenet'
# threshold for distance
L_THRESHOLD = 0.03
# whether keep the final result or the best result
KEEP_FINAL = False
# max_val of image
MAX_VAL = 255
# The following variables are used by DSSIM, should keep as default
# filter size in SSIM
FILTER_SIZE = 11
# filter sigma in SSIM
FILTER_SIGMA = 1.5
# weights used in MS-SSIM
SCALE_WEIGHTS = None
MAXIMIZE = False
IMAGE_SHAPE = (224, 224, 3)
RATIO = 1.0
LIMIT_DIST = False
def __init__(self, sess, bottleneck_model_ls, mimic_img=MIMIC_IMG,
batch_size=1, learning_rate=LEARNING_RATE,
max_iterations=MAX_ITERATIONS, initial_const=INITIAL_CONST,
intensity_range=INTENSITY_RANGE, l_threshold=L_THRESHOLD,
max_val=MAX_VAL, keep_final=KEEP_FINAL, maximize=MAXIMIZE, image_shape=IMAGE_SHAPE,
verbose=0, ratio=RATIO, limit_dist=LIMIT_DIST):
assert intensity_range in {'raw', 'imagenet', 'inception', 'mnist'}
# constant used for tanh transformation to avoid corner cases
self.tanh_constant = 2 - 1e-6
self.sess = sess
self.MIMIC_IMG = mimic_img
self.LEARNING_RATE = learning_rate
self.MAX_ITERATIONS = max_iterations
self.initial_const = initial_const
self.batch_size = batch_size
self.intensity_range = intensity_range
self.l_threshold = l_threshold
self.max_val = max_val
self.keep_final = keep_final
self.verbose = verbose
self.maximize = maximize
self.learning_rate = learning_rate
self.ratio = ratio
self.limit_dist = limit_dist
self.single_shape = list(image_shape)
self.input_shape = tuple([self.batch_size] + self.single_shape)
self.bottleneck_shape = tuple([self.batch_size] + self.single_shape)
# the variable we're going to optimize over
self.modifier = tf.Variable(np.zeros(self.input_shape, dtype=np.float32))
# target image in tanh space
if self.MIMIC_IMG:
self.timg_tanh = tf.Variable(np.zeros(self.input_shape), dtype=np.float32)
else:
self.bottleneck_t_raw = tf.Variable(np.zeros(self.bottleneck_shape), dtype=np.float32)
# source image in tanh space
self.simg_tanh = tf.Variable(np.zeros(self.input_shape), dtype=np.float32)
self.const = tf.Variable(np.ones(batch_size), dtype=np.float32)
self.mask = tf.Variable(np.ones((batch_size), dtype=np.bool))
self.weights = tf.Variable(np.ones(self.bottleneck_shape,
dtype=np.float32))
# and here's what we use to assign them
self.assign_modifier = tf.placeholder(tf.float32, self.input_shape)
if self.MIMIC_IMG:
self.assign_timg_tanh = tf.placeholder(
tf.float32, self.input_shape)
else:
self.assign_bottleneck_t_raw = tf.placeholder(
tf.float32, self.bottleneck_shape)
self.assign_simg_tanh = tf.placeholder(tf.float32, self.input_shape)
self.assign_const = tf.placeholder(tf.float32, (batch_size))
self.assign_mask = tf.placeholder(tf.bool, (batch_size))
self.assign_weights = tf.placeholder(tf.float32, self.bottleneck_shape)
# the resulting image, tanh'd to keep bounded from -0.5 to 0.5
# adversarial image in raw space
self.aimg_raw = (tf.tanh(self.modifier + self.simg_tanh) /
self.tanh_constant +
0.5) * 255.0
# source image in raw space
self.simg_raw = (tf.tanh(self.simg_tanh) /
self.tanh_constant +
0.5) * 255.0
if self.MIMIC_IMG:
# target image in raw space
self.timg_raw = (tf.tanh(self.timg_tanh) /
self.tanh_constant +
0.5) * 255.0
# convert source and adversarial image into input space
if self.intensity_range == 'imagenet':
mean = tf.constant(np.repeat([[[[103.939, 116.779, 123.68]]]], self.batch_size, axis=0), dtype=tf.float32,
name='img_mean')
self.aimg_input = (self.aimg_raw[..., ::-1] - mean)
self.simg_input = (self.simg_raw[..., ::-1] - mean)
if self.MIMIC_IMG:
self.timg_input = (self.timg_raw[..., ::-1] - mean)
elif self.intensity_range == 'raw':
self.aimg_input = self.aimg_raw
self.simg_input = self.simg_raw
if self.MIMIC_IMG:
self.timg_input = self.timg_raw
def batch_gen_DSSIM(aimg_raw_split, simg_raw_split):
msssim_split = tf.image.ssim(aimg_raw_split, simg_raw_split, max_val=255.0)
dist = (1.0 - tf.stack(msssim_split)) / 2.0
return dist
# raw value of DSSIM distance
self.dist_raw = batch_gen_DSSIM(self.aimg_raw, self.simg_raw)
# distance value after applying threshold
self.dist = tf.maximum(self.dist_raw - self.l_threshold, 0.0)
self.dist_raw_sum = tf.reduce_sum(
tf.where(self.mask,
self.dist_raw,
tf.zeros_like(self.dist_raw)))
self.dist_sum = tf.reduce_sum(tf.where(self.mask, self.dist, tf.zeros_like(self.dist)))
def resize_tensor(input_tensor, model_input_shape):
if input_tensor.shape[1:] == model_input_shape or model_input_shape[1] is None:
return input_tensor
resized_tensor = tf.image.resize(input_tensor, model_input_shape[:2])
return resized_tensor
def calculate_direction(bottleneck_model, cur_timg_input, cur_simg_input):
target_features = bottleneck_model(cur_timg_input)
return target_features
# target_center = tf.reduce_mean(target_features, axis=0)
# original = bottleneck_model(cur_simg_input)
# original_center = tf.reduce_mean(original, axis=0)
# direction = target_center - original_center
# final_target = original + self.ratio * direction
# return final_target
self.bottlesim = 0.0
self.bottlesim_sum = 0.0
self.bottlesim_push = 0.0
for bottleneck_model in bottleneck_model_ls:
model_input_shape = bottleneck_model.input_shape[1:]
cur_aimg_input = resize_tensor(self.aimg_input, model_input_shape)
self.bottleneck_a = bottleneck_model(cur_aimg_input)
if self.MIMIC_IMG:
# cur_timg_input = resize_tensor(self.timg_input, model_input_shape)
# cur_simg_input = resize_tensor(self.simg_input, model_input_shape)
cur_timg_input = self.timg_input
cur_simg_input = self.simg_input
self.bottleneck_t = calculate_direction(bottleneck_model, cur_timg_input, cur_simg_input)
# self.bottleneck_t = bottleneck_model(cur_timg_input)
else:
self.bottleneck_t = self.bottleneck_t_raw
bottleneck_diff = self.bottleneck_t - self.bottleneck_a
scale_factor = tf.sqrt(tf.reduce_sum(tf.square(self.bottleneck_t), axis=1))
cur_bottlesim = tf.sqrt(tf.reduce_sum(tf.square(bottleneck_diff), axis=1))
cur_bottlesim = cur_bottlesim / scale_factor
cur_bottlesim_sum = tf.reduce_sum(cur_bottlesim)
self.bottlesim += cur_bottlesim
# self.bottlesim_push += cur_bottlesim_push_sum
self.bottlesim_sum += cur_bottlesim_sum
# sum up the losses
if self.maximize:
self.loss = self.const * tf.square(self.dist) - self.bottlesim
else:
self.loss = self.const * tf.square(self.dist) + self.bottlesim
self.loss_sum = tf.reduce_sum(tf.where(self.mask, self.loss, tf.zeros_like(self.loss)))
# Setup the Adadelta optimizer and keep track of variables
# we're creating
start_vars = set(x.name for x in tf.global_variables())
self.learning_rate_holder = tf.placeholder(tf.float32, shape=[])
optimizer = tf.train.AdadeltaOptimizer(self.learning_rate_holder)
self.train = optimizer.minimize(self.loss_sum,
var_list=[self.modifier])
end_vars = tf.global_variables()
new_vars = [x for x in end_vars if x.name not in start_vars]
# these are the variables to initialize when we run
self.setup = []
self.setup.append(self.modifier.assign(self.assign_modifier))
if self.MIMIC_IMG:
self.setup.append(self.timg_tanh.assign(self.assign_timg_tanh))
else:
self.setup.append(self.bottleneck_t_raw.assign(
self.assign_bottleneck_t_raw))
self.setup.append(self.simg_tanh.assign(self.assign_simg_tanh))
self.setup.append(self.const.assign(self.assign_const))
self.setup.append(self.mask.assign(self.assign_mask))
self.setup.append(self.weights.assign(self.assign_weights))
self.init = tf.variables_initializer(var_list=[self.modifier] + new_vars)
print('Attacker loaded')
def preprocess_arctanh(self, imgs):
imgs = reverse_preprocess(imgs, self.intensity_range)
imgs /= 255.0
imgs -= 0.5
imgs *= self.tanh_constant
tanh_imgs = np.arctanh(imgs)
return tanh_imgs
def clipping(self, imgs):
imgs = reverse_preprocess(imgs, self.intensity_range)
imgs = np.clip(imgs, 0, self.max_val)
imgs = np.rint(imgs)
imgs = preprocess(imgs, self.intensity_range)
return imgs
def attack(self, source_imgs, target_imgs, weights=None):
if weights is None:
weights = np.ones([source_imgs.shape[0]] +
list(self.bottleneck_shape[1:]))
assert weights.shape[1:] == self.bottleneck_shape[1:]
assert source_imgs.shape[1:] == self.input_shape[1:]
assert source_imgs.shape[0] == weights.shape[0]
if self.MIMIC_IMG:
assert target_imgs.shape[1:] == self.input_shape[1:]
assert source_imgs.shape[0] == target_imgs.shape[0]
else:
assert target_imgs.shape[1:] == self.bottleneck_shape[1:]
assert source_imgs.shape[0] == target_imgs.shape[0]
start_time = time.time()
adv_imgs = []
print('%d batches in total'
% int(np.ceil(len(source_imgs) / self.batch_size)))
for idx in range(0, len(source_imgs), self.batch_size):
print('processing batch %d at %s' % (idx, datetime.datetime.now()))
adv_img = self.attack_batch(source_imgs[idx:idx + self.batch_size],
target_imgs[idx:idx + self.batch_size],
weights[idx:idx + self.batch_size])
adv_imgs.extend(adv_img)
elapsed_time = time.time() - start_time
print('attack cost %f s' % (elapsed_time))
return np.array(adv_imgs)
def attack_batch(self, source_imgs, target_imgs, weights):
"""
Run the attack on a batch of images and labels.
"""
LR = self.learning_rate
nb_imgs = source_imgs.shape[0]
mask = [True] * nb_imgs + [False] * (self.batch_size - nb_imgs)
mask = np.array(mask, dtype=np.bool)
source_imgs = np.array(source_imgs)
target_imgs = np.array(target_imgs)
# convert to tanh-space
simg_tanh = self.preprocess_arctanh(source_imgs)
if self.MIMIC_IMG:
timg_tanh = self.preprocess_arctanh(target_imgs)
else:
timg_tanh = target_imgs
CONST = np.ones(self.batch_size) * self.initial_const
self.sess.run(self.init)
simg_tanh_batch = np.zeros(self.input_shape)
if self.MIMIC_IMG:
timg_tanh_batch = np.zeros(self.input_shape)
else:
timg_tanh_batch = np.zeros(self.bottleneck_shape)
weights_batch = np.zeros(self.bottleneck_shape)
simg_tanh_batch[:nb_imgs] = simg_tanh[:nb_imgs]
timg_tanh_batch[:nb_imgs] = timg_tanh[:nb_imgs]
weights_batch[:nb_imgs] = weights[:nb_imgs]
modifier_batch = np.ones(self.input_shape) * 1e-6
# set the variables so that we don't have to send them over again
if self.MIMIC_IMG:
self.sess.run(self.setup,
{self.assign_timg_tanh: timg_tanh_batch,
self.assign_simg_tanh: simg_tanh_batch,
self.assign_const: CONST,
self.assign_mask: mask,
self.assign_weights: weights_batch,
self.assign_modifier: modifier_batch})
else:
# if directly mimicking a vector, use assign_bottleneck_t_raw
# in setup
self.sess.run(self.setup,
{self.assign_bottleneck_t_raw: timg_tanh_batch,
self.assign_simg_tanh: simg_tanh_batch,
self.assign_const: CONST,
self.assign_mask: mask,
self.assign_weights: weights_batch,
self.assign_modifier: modifier_batch})
best_bottlesim = [0] * nb_imgs if self.maximize else [np.inf] * nb_imgs
best_adv = np.zeros_like(source_imgs)
if self.verbose == 1:
loss_sum = float(self.sess.run(self.loss_sum))
dist_sum = float(self.sess.run(self.dist_sum))
thresh_over = (dist_sum / self.batch_size / self.l_threshold * 100)
dist_raw_sum = float(self.sess.run(self.dist_raw_sum))
bottlesim_sum = self.sess.run(self.bottlesim_sum)
print('START: Total loss: %.4E; perturb: %.6f (%.2f%% over, raw: %.6f); sim: %f'
% (Decimal(loss_sum),
dist_sum,
thresh_over,
dist_raw_sum,
bottlesim_sum / nb_imgs))
try:
total_distance = [0] * nb_imgs
if self.limit_dist:
dist_raw_list, bottlesim_list, aimg_input_list = self.sess.run(
[self.dist_raw,
self.bottlesim,
self.aimg_input])
for e, (dist_raw, bottlesim, aimg_input) in enumerate(
zip(dist_raw_list, bottlesim_list, aimg_input_list)):
if e >= nb_imgs:
break
total_distance[e] = bottlesim
for iteration in range(self.MAX_ITERATIONS):
self.sess.run([self.train], feed_dict={self.learning_rate_holder: LR})
dist_raw_list, bottlesim_list, aimg_input_list = self.sess.run(
[self.dist_raw,
self.bottlesim,
self.aimg_input])
for e, (dist_raw, bottlesim, aimg_input) in enumerate(
zip(dist_raw_list, bottlesim_list, aimg_input_list)):
if e >= nb_imgs:
break
if (bottlesim < best_bottlesim[e] and bottlesim > total_distance[e] * 0.1 and (
not self.maximize)) or (
bottlesim > best_bottlesim[e] and self.maximize):
best_bottlesim[e] = bottlesim
best_adv[e] = aimg_input
if iteration != 0 and iteration % (self.MAX_ITERATIONS // 3) == 0:
LR = LR / 2
print("Learning Rate: ", LR)
if iteration % (self.MAX_ITERATIONS // 10) == 0:
if self.verbose == 1:
loss_sum = float(self.sess.run(self.loss_sum))
dist_sum = float(self.sess.run(self.dist_sum))
thresh_over = (dist_sum /
self.batch_size /
self.l_threshold *
100)
dist_raw_sum = float(self.sess.run(self.dist_raw_sum))
bottlesim_sum = self.sess.run(self.bottlesim_sum)
print('ITER %4d: Total loss: %.4E; perturb: %.6f (%.2f%% over, raw: %.6f); sim: %f'
% (iteration,
Decimal(loss_sum),
dist_sum,
thresh_over,
dist_raw_sum,
bottlesim_sum / nb_imgs))
except KeyboardInterrupt:
pass
if self.verbose == 1:
loss_sum = float(self.sess.run(self.loss_sum))
dist_sum = float(self.sess.run(self.dist_sum))
thresh_over = (dist_sum / self.batch_size / self.l_threshold * 100)
dist_raw_sum = float(self.sess.run(self.dist_raw_sum))
bottlesim_sum = float(self.sess.run(self.bottlesim_sum))
print('END: Total loss: %.4E; perturb: %.6f (%.2f%% over, raw: %.6f); sim: %f'
% (Decimal(loss_sum),
dist_sum,
thresh_over,
dist_raw_sum,
bottlesim_sum / nb_imgs))
best_adv = self.clipping(best_adv[:nb_imgs])
return best_adv
+83 -71
View File
@@ -1,25 +1,18 @@
import argparse
import os
import sys
sys.path.append("/home/shansixioing/tools/")
sys.path.append("/home/shansixioing/cloak/")
import argparse
from tensorflow import set_random_seed
from utils import init_gpu, load_extractor, load_victim_model, dump_dictionary_as_json
import os
import numpy as np
sys.path.append("/home/shansixioing/fawkes/fawkes")
from utils import extract_faces, get_dataset_path, init_gpu, load_extractor, load_victim_model
import random
import pickle
import re
import glob
from keras.preprocessing import image
from keras.utils import to_categorical
from keras.applications.vgg16 import preprocess_input
# import locale
#
# loc = locale.getlocale()
# locale.setlocale(locale.LC_ALL, loc)
def select_samples(data_dir):
all_data_path = []
@@ -27,43 +20,49 @@ def select_samples(data_dir):
cls_dir = os.path.join(data_dir, cls)
for data_path in os.listdir(cls_dir):
all_data_path.append(os.path.join(cls_dir, data_path))
return all_data_path
def generator_wrap(cloak_data, n_classes, test=False, validation_split=0.1):
if test:
all_data_path = select_samples(cloak_data.test_data_dir)
else:
all_data_path = select_samples(cloak_data.train_data_dir)
split = int(len(cloak_data.cloaked_protect_train_X) * (1 - validation_split))
cloaked_train_X = cloak_data.cloaked_protect_train_X[:split]
np.random.seed(12345)
def generator_wrap(protect_images, test=False, validation_split=0.1):
train_data_dir, test_data_dir, num_classes, num_images = get_dataset_path(args.dataset)
# all_vals = list(cloak_data.path2idx.items())
idx = 0
path2class = {}
path2imgs_list = {}
for target_path in sorted(glob.glob(train_data_dir + "/*")):
path2class[target_path] = idx
path2imgs_list[target_path] = glob.glob(os.path.join(target_path, "*"))
idx += 1
if idx >= args.num_classes:
break
path2class["protected"] = idx
np.random.seed(12345)
while True:
batch_X = []
batch_Y = []
cur_batch_path = np.random.choice(all_data_path, args.batch_size)
cur_batch_path = np.random.choice(list(path2class.keys()), args.batch_size)
for p in cur_batch_path:
# p = p.encode("utf-8").decode("ascii", 'ignore')
cur_y = cloak_data.path2idx[p]
# protect class and sybil class do not need to appear in test dataset
if test and (re.search(cloak_data.protect_class, p)):
cur_y = path2class[p]
if test and p == 'protected':
continue
# protect class images in train dataset
elif p in cloak_data.protect_class_path:
cur_x = random.choice(cloaked_train_X)
elif p == 'protected':
cur_x = random.choice(protect_images)
else:
im = image.load_img(p, target_size=cloak_data.img_shape)
im = image.img_to_array(im)
cur_x = preprocess_input(im)
cur_path = random.choice(path2imgs_list[p])
im = image.load_img(cur_path, target_size=(224, 224))
cur_x = image.img_to_array(im)
cur_x = preprocess_input(cur_x)
batch_X.append(cur_x)
batch_Y.append(cur_y)
batch_X = np.array(batch_X)
batch_Y = to_categorical(np.array(batch_Y), num_classes=n_classes)
batch_Y = to_categorical(np.array(batch_Y), num_classes=args.num_classes + 1)
yield batch_X, batch_Y
@@ -87,51 +86,59 @@ def eval_cloaked_test_data(cloak_data, n_classes, validation_split=0.1):
def main():
init_gpu(args.gpu)
if args.dataset == 'pubfig':
N_CLASSES = 65
CLOAK_DIR = args.cloak_data
elif args.dataset == 'scrub':
N_CLASSES = 530
CLOAK_DIR = args.cloak_data
else:
raise ValueError
CLOAK_DIR = os.path.join("../results", CLOAK_DIR)
RES = pickle.load(open(os.path.join(CLOAK_DIR, "cloak_data.p"), 'rb'))
#
# if args.dataset == 'pubfig':
# N_CLASSES = 65
# CLOAK_DIR = args.cloak_data
# elif args.dataset == 'scrub':
# N_CLASSES = 530
# CLOAK_DIR = args.cloak_data
# else:
# raise ValueError
print("Build attacker's model")
cloak_data = RES['cloak_data']
EVAL_RES = {}
train_generator = generator_wrap(cloak_data, n_classes=N_CLASSES,
image_paths = glob.glob(os.path.join(args.directory, "*"))
original_image_paths = sorted([path for path in image_paths if "_cloaked" not in path.split("/")[-1]])
protect_image_paths = sorted([path for path in image_paths if "_cloaked" in path.split("/")[-1]])
original_imgs = np.array([extract_faces(image.img_to_array(image.load_img(cur_path))) for cur_path in
original_image_paths[:150]])
original_y = to_categorical([args.num_classes] * len(original_imgs), num_classes=args.num_classes + 1)
protect_imgs = [extract_faces(image.img_to_array(image.load_img(cur_path))) for cur_path in
protect_image_paths]
train_generator = generator_wrap(protect_imgs,
validation_split=args.validation_split)
test_generator = generator_wrap(cloak_data, test=True, n_classes=N_CLASSES,
test_generator = generator_wrap(protect_imgs, test=True,
validation_split=args.validation_split)
EVAL_RES['transfer_model'] = args.transfer_model
base_model = load_extractor(args.transfer_model)
model = load_victim_model(teacher_model=base_model, number_classes=N_CLASSES)
model = load_victim_model(teacher_model=base_model, number_classes=args.num_classes + 1)
original_X, original_Y = eval_uncloaked_test_data(cloak_data, N_CLASSES)
cloaked_test_X, cloaked_test_Y = eval_cloaked_test_data(cloak_data, N_CLASSES,
validation_split=args.validation_split)
# cloaked_test_X, cloaked_test_Y = eval_cloaked_test_data(cloak_data, args.num_classes,
# validation_split=args.validation_split)
try:
model.fit_generator(train_generator, steps_per_epoch=cloak_data.number_samples // 32,
validation_data=(original_X, original_Y), epochs=args.n_epochs, verbose=2,
use_multiprocessing=False, workers=1)
except KeyboardInterrupt:
pass
# try:
train_data_dir, test_data_dir, num_classes, num_images = get_dataset_path(args.dataset)
model.fit_generator(train_generator, steps_per_epoch=num_images // 32,
validation_data=(original_imgs, original_y),
epochs=args.n_epochs,
verbose=1,
use_multiprocessing=True, workers=5)
# except KeyboardInterrupt:
# pass
_, acc_original = model.evaluate(original_X, original_Y, verbose=0)
_, acc_original = model.evaluate(original_imgs, original_y, verbose=0)
print("Accuracy on uncloaked/original images TEST: {:.4f}".format(acc_original))
EVAL_RES['acc_original'] = acc_original
# EVAL_RES['acc_original'] = acc_original
_, other_acc = model.evaluate_generator(test_generator, verbose=0, steps=50)
print("Accuracy on other classes {:.4f}".format(other_acc))
EVAL_RES['other_acc'] = other_acc
dump_dictionary_as_json(EVAL_RES, os.path.join(CLOAK_DIR, "eval_seed{}.json".format(args.seed_idx)))
# EVAL_RES['other_acc'] = other_acc
# dump_dictionary_as_json(EVAL_RES, os.path.join(CLOAK_DIR, "eval_seed{}.json".format(args.seed_idx)))
def parse_arguments(argv):
@@ -139,16 +146,21 @@ def parse_arguments(argv):
parser.add_argument('--gpu', type=str,
help='GPU id', default='0')
parser.add_argument('--dataset', type=str,
help='name of dataset', default='scrub')
parser.add_argument('--cloak_data', type=str,
parser.add_argument('--num_classes', type=int,
help='name of dataset', default=520)
parser.add_argument('--directory', '-d', type=str,
help='name of the cloak result directory',
default='scrub_webface_dense_robust_extract_protectPatrick_Dempsey')
default='img/')
parser.add_argument('--transfer_model', type=str,
help='the feature extractor used for tracker model training. It can be the same or not same as the user\'s', default='vggface2_inception_extract')
help='the feature extractor used for tracker model training. ', default='low_extract')
parser.add_argument('--batch_size', type=int, default=32)
parser.add_argument('--validation_split', type=float, default=0.1)
parser.add_argument('--n_epochs', type=int, default=5)
parser.add_argument('--n_epochs', type=int, default=3)
return parser.parse_args(argv)
+26 -15
View File
@@ -1,4 +1,5 @@
import argparse
import glob
import os
import pickle
import random
@@ -7,8 +8,9 @@ import sys
import numpy as np
from keras.applications.vgg16 import preprocess_input
from keras.preprocessing import image
from utils import load_extractor, get_dataset_path
sys.path.append("../fawkes")
# from utils import load_extractor
import keras
def load_sample_dir(path, sample=10):
x_ls = []
@@ -30,21 +32,26 @@ def normalize(x):
def main():
extractor = load_extractor(args.feature_extractor)
extractor = keras.models.load_model(args.feature_extractor)
path2emb = {}
for target_dataset in args.candidate_datasets:
target_dataset_path, _, _, _ = get_dataset_path(target_dataset)
for target_class in os.listdir(target_dataset_path):
target_class_path = os.path.join(target_dataset_path, target_class)
target_X = load_sample_dir(target_class_path)
cur_feature = extractor.predict(target_X)
cur_feature = np.mean(cur_feature, axis=0)
path2emb[target_class_path] = cur_feature
model_dir = os.path.join(os.path.expanduser('~'), '.fawkes')
for path in glob.glob(os.path.join(model_dir, "target_data/*")):
print(path)
idx = int(path.split("/")[-1])
cur_image_paths = glob.glob(os.path.join(path, "*"))
imgs = np.array([image.img_to_array(image.load_img(p, target_size=(224, 224))) for p in cur_image_paths])
imgs = preprocess_input(imgs)
for k, v in path2emb.items():
path2emb[k] = normalize(v)
cur_feature = extractor.predict(imgs)
cur_feature = np.mean(cur_feature, axis=0)
path2emb[idx] = cur_feature
model_path = os.path.join(model_dir, "{}_extract.h5".format(args.feature_extractor_name))
emb_path = os.path.join(model_dir, "{}_emb.p".format(args.feature_extractor_name))
extractor.save(model_path)
pickle.dump(path2emb, open(emb_path, "wb"))
pickle.dump(path2emb, open("../feature_extractors/embeddings/{}_emb.p".format(args.feature_extractor), "wb"))
def parse_arguments(argv):
@@ -54,8 +61,12 @@ def parse_arguments(argv):
parser.add_argument('--candidate-datasets', nargs='+',
help='path candidate datasets')
parser.add_argument('--feature-extractor', type=str,
help="path of the feature extractor used for optimization",
default="/home/shansixioing/fawkes/feature_extractors/high2_extract.h5")
parser.add_argument('--feature-extractor-name', type=str,
help="name of the feature extractor used for optimization",
default="webface_dense_robust_extract")
default="high2")
return parser.parse_args(argv)
-95
View File
@@ -1,95 +0,0 @@
import argparse
import os
import pickle
import random
import sys
import numpy as np
from differentiator import FawkesMaskGeneration
from tensorflow import set_random_seed
from utils import load_extractor, CloakData, init_gpu
random.seed(12243)
np.random.seed(122412)
set_random_seed(12242)
NUM_IMG_PROTECTED = 400 # Number of images used to optimize the target class
BATCH_SIZE = 32
MAX_ITER = 1000
def diff_protected_data(sess, feature_extractors_ls, image_X, number_protect, target_X=None, th=0.01):
image_X = image_X[:number_protect]
differentiator = FawkesMaskGeneration(sess, feature_extractors_ls,
batch_size=BATCH_SIZE,
mimic_img=True,
intensity_range='imagenet',
initial_const=args.sd,
learning_rate=args.lr,
max_iterations=MAX_ITER,
l_threshold=th,
verbose=1, maximize=False, keep_final=False, image_shape=image_X.shape[1:])
if len(target_X) < len(image_X):
target_X = np.concatenate([target_X, target_X, target_X])
target_X = target_X[:len(image_X)]
cloaked_image_X = differentiator.attack(image_X, target_X)
return cloaked_image_X
def perform_defense():
RES = {}
sess = init_gpu(args.gpu)
FEATURE_EXTRACTORS = [args.feature_extractor]
RES_DIR = '../results/'
RES['num_img_protected'] = NUM_IMG_PROTECTED
RES['extractors'] = FEATURE_EXTRACTORS
num_protect = NUM_IMG_PROTECTED
print("Loading {} for optimization".format(args.feature_extractor))
feature_extractors_ls = [load_extractor(name) for name in FEATURE_EXTRACTORS]
protect_class = args.protect_class
cloak_data = CloakData(args.dataset, protect_class=protect_class)
RES_FILE_NAME = "{}_{}_protect{}".format(args.dataset, args.feature_extractor, cloak_data.protect_class)
RES_FILE_NAME = os.path.join(RES_DIR, RES_FILE_NAME)
print("Protect Class: ", cloak_data.protect_class)
cloak_data.target_path, cloak_data.target_data = cloak_data.select_target_label(feature_extractors_ls,
FEATURE_EXTRACTORS)
os.makedirs(RES_DIR, exist_ok=True)
os.makedirs(RES_FILE_NAME, exist_ok=True)
cloak_image_X = diff_protected_data(sess, feature_extractors_ls, cloak_data.protect_train_X,
number_protect=num_protect,
target_X=cloak_data.target_data, th=args.th)
cloak_data.cloaked_protect_train_X = cloak_image_X
RES['cloak_data'] = cloak_data
pickle.dump(RES, open(os.path.join(RES_FILE_NAME, 'cloak_data.p'), "wb"))
def parse_arguments(argv):
parser = argparse.ArgumentParser()
parser.add_argument('--gpu', type=str,
help='GPU id', default='0')
parser.add_argument('--dataset', type=str,
help='name of dataset', default='scrub')
parser.add_argument('--feature-extractor', type=str,
help="name of the feature extractor used for optimization",
default="webface_dense_robust_extract")
parser.add_argument('--th', type=float, default=0.007)
parser.add_argument('--sd', type=int, default=1e9)
parser.add_argument('--protect_class', type=str, default=None)
parser.add_argument('--lr', type=float, default=1)
return parser.parse_args(argv)
if __name__ == '__main__':
args = parse_arguments(sys.argv[1:])
perform_defense()
-373
View File
@@ -1,373 +0,0 @@
import json
import os
import pickle
import random
import keras
import keras.backend as K
import numpy as np
import tensorflow as tf
from keras.applications.vgg16 import preprocess_input
from keras.layers import Dense, Activation
from keras.models import Model
from keras.preprocessing import image
from keras.utils import to_categorical
from sklearn.metrics import pairwise_distances
# from keras.utils import get_file
def clip_img(X, preprocessing='raw'):
X = reverse_preprocess(X, preprocessing)
X = np.clip(X, 0.0, 255.0)
X = preprocess(X, preprocessing)
return X
def dump_dictionary_as_json(dict, outfile):
j = json.dumps(dict)
with open(outfile, "wb") as f:
f.write(j.encode())
def fix_gpu_memory(mem_fraction=1):
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=mem_fraction)
tf_config = tf.ConfigProto(gpu_options=gpu_options)
tf_config.gpu_options.allow_growth = True
tf_config.log_device_placement = False
init_op = tf.global_variables_initializer()
sess = tf.Session(config=tf_config)
sess.run(init_op)
K.set_session(sess)
return sess
def load_victim_model(number_classes, teacher_model=None, end2end=False):
for l in teacher_model.layers:
l.trainable = end2end
x = teacher_model.layers[-1].output
x = Dense(number_classes)(x)
x = Activation('softmax', name="act")(x)
model = Model(teacher_model.input, x)
opt = keras.optimizers.Adadelta()
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
return model
def init_gpu(gpu_index, force=False):
if isinstance(gpu_index, list):
gpu_num = ','.join([str(i) for i in gpu_index])
else:
gpu_num = str(gpu_index)
if "CUDA_VISIBLE_DEVICES" in os.environ and os.environ["CUDA_VISIBLE_DEVICES"] and not force:
print('GPU already initiated')
return
os.environ["CUDA_VISIBLE_DEVICES"] = gpu_num
sess = fix_gpu_memory()
return sess
def preprocess(X, method):
assert method in {'raw', 'imagenet', 'inception', 'mnist'}
if method is 'raw':
pass
elif method is 'imagenet':
X = imagenet_preprocessing(X)
else:
raise Exception('unknown method %s' % method)
return X
def reverse_preprocess(X, method):
assert method in {'raw', 'imagenet', 'inception', 'mnist'}
if method is 'raw':
pass
elif method is 'imagenet':
X = imagenet_reverse_preprocessing(X)
else:
raise Exception('unknown method %s' % method)
return X
def imagenet_preprocessing(x, data_format=None):
if data_format is None:
data_format = K.image_data_format()
assert data_format in ('channels_last', 'channels_first')
x = np.array(x)
if data_format == 'channels_first':
# 'RGB'->'BGR'
if x.ndim == 3:
x = x[::-1, ...]
else:
x = x[:, ::-1, ...]
else:
# 'RGB'->'BGR'
x = x[..., ::-1]
mean = [103.939, 116.779, 123.68]
std = None
# Zero-center by mean pixel
if data_format == 'channels_first':
if x.ndim == 3:
x[0, :, :] -= mean[0]
x[1, :, :] -= mean[1]
x[2, :, :] -= mean[2]
if std is not None:
x[0, :, :] /= std[0]
x[1, :, :] /= std[1]
x[2, :, :] /= std[2]
else:
x[:, 0, :, :] -= mean[0]
x[:, 1, :, :] -= mean[1]
x[:, 2, :, :] -= mean[2]
if std is not None:
x[:, 0, :, :] /= std[0]
x[:, 1, :, :] /= std[1]
x[:, 2, :, :] /= std[2]
else:
x[..., 0] -= mean[0]
x[..., 1] -= mean[1]
x[..., 2] -= mean[2]
if std is not None:
x[..., 0] /= std[0]
x[..., 1] /= std[1]
x[..., 2] /= std[2]
return x
def imagenet_reverse_preprocessing(x, data_format=None):
import keras.backend as K
x = np.array(x)
if data_format is None:
data_format = K.image_data_format()
assert data_format in ('channels_last', 'channels_first')
if data_format == 'channels_first':
if x.ndim == 3:
# Zero-center by mean pixel
x[0, :, :] += 103.939
x[1, :, :] += 116.779
x[2, :, :] += 123.68
# 'BGR'->'RGB'
x = x[::-1, :, :]
else:
x[:, 0, :, :] += 103.939
x[:, 1, :, :] += 116.779
x[:, 2, :, :] += 123.68
x = x[:, ::-1, :, :]
else:
# Zero-center by mean pixel
x[..., 0] += 103.939
x[..., 1] += 116.779
x[..., 2] += 123.68
# 'BGR'->'RGB'
x = x[..., ::-1]
return x
def build_bottleneck_model(model, cut_off):
bottleneck_model = Model(model.input, model.get_layer(cut_off).output)
bottleneck_model.compile(loss='categorical_crossentropy',
optimizer='adam',
metrics=['accuracy'])
return bottleneck_model
def load_extractor(name):
model = keras.models.load_model("../feature_extractors/{}.h5".format(name))
if hasattr(model.layers[-1], "activation") and model.layers[-1].activation == "softmax":
raise Exception(
"Given extractor's last layer is softmax, need to remove the top layers to make it into a feature extractor")
# if "extract" in name.split("/")[-1]:
# pass
# else:
# print("Convert a model to a feature extractor")
# model = build_bottleneck_model(model, model.layers[layer_idx].name)
# model.save(name + "extract")
# model = keras.models.load_model(name + "extract")
return model
def get_dataset_path(dataset):
if not os.path.exists("config.json"):
raise Exception("Please config the datasets before running protection code. See more in README and config.py.")
config = json.load(open("config.json", 'r'))
if dataset not in config:
raise Exception(
"Dataset {} does not exist, please download to data/ and add the path to this function... Abort".format(
dataset))
return config[dataset]['train_dir'], config[dataset]['test_dir'], config[dataset]['num_classes'], config[dataset][
'num_images']
def normalize(x):
return x / np.linalg.norm(x, axis=1, keepdims=True)
class CloakData(object):
def __init__(self, dataset, img_shape=(224, 224), protect_class=None):
self.dataset = dataset
self.img_shape = img_shape
self.train_data_dir, self.test_data_dir, self.number_classes, self.number_samples = get_dataset_path(dataset)
self.all_labels = sorted(list(os.listdir(self.train_data_dir)))
if protect_class:
self.protect_class = protect_class
else:
self.protect_class = random.choice(self.all_labels)
self.sybil_class = random.choice([l for l in self.all_labels if l != self.protect_class])
self.protect_train_X, self.protect_test_X = self.load_label_data(self.protect_class)
self.sybil_train_X, self.sybil_test_X = self.load_label_data(self.sybil_class)
self.cloaked_protect_train_X = None
self.cloaked_sybil_train_X = None
self.label2path_train, self.label2path_test, self.path2idx = self.build_data_mapping()
self.all_training_path = self.get_all_data_path(self.label2path_train)
self.all_test_path = self.get_all_data_path(self.label2path_test)
self.protect_class_path = self.get_class_image_files(os.path.join(self.train_data_dir, self.protect_class))
self.sybil_class_path = self.get_class_image_files(os.path.join(self.train_data_dir, self.sybil_class))
print("Find {} protect images".format(len(self.protect_class_path)))
def get_class_image_files(self, path):
return [os.path.join(path, f) for f in os.listdir(path)]
def extractor_ls_predict(self, feature_extractors_ls, X):
feature_ls = []
for extractor in feature_extractors_ls:
cur_features = extractor.predict(X)
feature_ls.append(cur_features)
concated_feature_ls = np.concatenate(feature_ls, axis=1)
concated_feature_ls = normalize(concated_feature_ls)
return concated_feature_ls
def load_embeddings(self, feature_extractors_names):
dictionaries = []
for extractor_name in feature_extractors_names:
path2emb = pickle.load(open("../feature_extractors/embeddings/{}_emb_norm.p".format(extractor_name), "rb"))
dictionaries.append(path2emb)
merge_dict = {}
for k in dictionaries[0].keys():
cur_emb = [dic[k] for dic in dictionaries]
merge_dict[k] = np.concatenate(cur_emb)
return merge_dict
def select_target_label(self, feature_extractors_ls, feature_extractors_names, metric='l2'):
original_feature_x = self.extractor_ls_predict(feature_extractors_ls, self.protect_train_X)
path2emb = self.load_embeddings(feature_extractors_names)
items = list(path2emb.items())
paths = [p[0] for p in items]
embs = [p[1] for p in items]
embs = np.array(embs)
pair_dist = pairwise_distances(original_feature_x, embs, metric)
max_sum = np.min(pair_dist, axis=0)
sorted_idx = np.argsort(max_sum)[::-1]
highest_num = 0
paired_target_X = None
final_target_class_path = None
for idx in sorted_idx[:5]:
target_class_path = paths[idx]
cur_target_X = self.load_dir(target_class_path)
cur_target_X = np.concatenate([cur_target_X, cur_target_X, cur_target_X])
cur_tot_sum, cur_paired_target_X = self.calculate_dist_score(self.protect_train_X, cur_target_X,
feature_extractors_ls,
metric=metric)
if cur_tot_sum > highest_num:
highest_num = cur_tot_sum
paired_target_X = cur_paired_target_X
final_target_class_path = target_class_path
np.random.shuffle(paired_target_X)
return final_target_class_path, paired_target_X
def calculate_dist_score(self, a, b, feature_extractors_ls, metric='l2'):
features1 = self.extractor_ls_predict(feature_extractors_ls, a)
features2 = self.extractor_ls_predict(feature_extractors_ls, b)
pair_cos = pairwise_distances(features1, features2, metric)
max_sum = np.min(pair_cos, axis=0)
max_sum_arg = np.argsort(max_sum)[::-1]
max_sum_arg = max_sum_arg[:len(a)]
max_sum = [max_sum[i] for i in max_sum_arg]
paired_target_X = [b[j] for j in max_sum_arg]
paired_target_X = np.array(paired_target_X)
return np.min(max_sum), paired_target_X
def get_all_data_path(self, label2path):
all_paths = []
for k, v in label2path.items():
cur_all_paths = [os.path.join(k, cur_p) for cur_p in v]
all_paths.extend(cur_all_paths)
return all_paths
def load_label_data(self, label):
train_label_path = os.path.join(self.train_data_dir, label)
test_label_path = os.path.join(self.test_data_dir, label)
train_X = self.load_dir(train_label_path)
test_X = self.load_dir(test_label_path)
return train_X, test_X
def load_dir(self, path):
assert os.path.exists(path)
x_ls = []
for file in os.listdir(path):
cur_path = os.path.join(path, file)
im = image.load_img(cur_path, target_size=self.img_shape)
im = image.img_to_array(im)
x_ls.append(im)
raw_x = np.array(x_ls)
return preprocess_input(raw_x)
def build_data_mapping(self):
label2path_train = {}
label2path_test = {}
idx = 0
path2idx = {}
for label_name in self.all_labels:
full_path_train = os.path.join(self.train_data_dir, label_name)
full_path_test = os.path.join(self.test_data_dir, label_name)
label2path_train[full_path_train] = list(os.listdir(full_path_train))
label2path_test[full_path_test] = list(os.listdir(full_path_test))
for img_file in os.listdir(full_path_train):
path2idx[os.path.join(full_path_train, img_file)] = idx
for img_file in os.listdir(full_path_test):
path2idx[os.path.join(full_path_test, img_file)] = idx
idx += 1
return label2path_train, label2path_test, path2idx
def generate_data_post_cloak(self, sybil=False):
assert self.cloaked_protect_train_X is not None
while True:
batch_X = []
batch_Y = []
cur_batch_path = random.sample(self.all_training_path, 32)
for p in cur_batch_path:
cur_y = self.path2idx[p]
if p in self.protect_class_path:
cur_x = random.choice(self.cloaked_protect_train_X)
elif sybil and (p in self.sybil_class):
cur_x = random.choice(self.cloaked_sybil_train_X)
else:
im = image.load_img(p, target_size=self.img_shape)
im = image.img_to_array(im)
cur_x = preprocess_input(im)
batch_X.append(cur_x)
batch_Y.append(cur_y)
batch_X = np.array(batch_X)
batch_Y = to_categorical(np.array(batch_Y), num_classes=self.number_classes)
yield batch_X, batch_Y