0.01

Former-commit-id: 268fb7e6825ddfc1165fa7adc7c216f9d61005da [formerly 06376993a831c060c337ec6e7540252f0b2dfe09] Former-commit-id: c4812d40187a76a878e7d215d22ee84811b41896
2026-06-12 21:50:46 +05:30 · 2020-07-01 21:16:03 -05:00
parent 3ba2abacf2
commit 889fd933e8
19 changed files with 1647 additions and 1099 deletions
@@ -0,0 +1 @@
+58d500da850206b845bdd0150fa182a0ff8c50f0
@@ -0,0 +1,80 @@
+import detect_face
+import numpy as np
+import tensorflow as tf
+
+# modify the default parameters of np.load
+np_load_old = np.load
+np.load = lambda *a, **k: np_load_old(*a, allow_pickle=True, **k)
+
+
+def to_rgb(img):
+    w, h = img.shape
+    ret = np.empty((w, h, 3), dtype=np.uint8)
+    ret[:, :, 0] = ret[:, :, 1] = ret[:, :, 2] = img
+    return ret
+
+
+def aligner(sess):
+    pnet, rnet, onet = detect_face.create_mtcnn(sess, None)
+    return [pnet, rnet, onet]
+
+
+def align(orig_img, aligner, margin=0.8, detect_multiple_faces=True):
+    pnet, rnet, onet = aligner
+    minsize = 20  # minimum size of face
+    threshold = [0.6, 0.7, 0.7]  # three steps's threshold
+    factor = 0.709  # scale factor
+
+    if orig_img.ndim < 2:
+        return None
+    if orig_img.ndim == 2:
+        orig_img = to_rgb(orig_img)
+    orig_img = orig_img[:, :, 0:3]
+
+    bounding_boxes, _ = detect_face.detect_face(orig_img, minsize, pnet, rnet, onet, threshold, factor)
+    nrof_faces = bounding_boxes.shape[0]
+    if nrof_faces > 0:
+        det = bounding_boxes[:, 0:4]
+        det_arr = []
+        img_size = np.asarray(orig_img.shape)[0:2]
+        if nrof_faces > 1:
+            margin = margin / 1.5
+            if detect_multiple_faces:
+                for i in range(nrof_faces):
+                    det_arr.append(np.squeeze(det[i]))
+            else:
+                bounding_box_size = (det[:, 2] - det[:, 0]) * (det[:, 3] - det[:, 1])
+                img_center = img_size / 2
+                offsets = np.vstack([(det[:, 0] + det[:, 2]) / 2 - img_center[1],
+                                     (det[:, 1] + det[:, 3]) / 2 - img_center[0]])
+                offset_dist_squared = np.sum(np.power(offsets, 2.0), 0)
+                index = np.argmax(bounding_box_size - offset_dist_squared * 2.0)  # some extra weight on the centering
+                det_arr.append(det[index, :])
+        else:
+            det_arr.append(np.squeeze(det))
+        cropped_arr = []
+        bounding_boxes_arr = []
+        for i, det in enumerate(det_arr):
+            det = np.squeeze(det)
+            bb = np.zeros(4, dtype=np.int32)
+            side_1 = int((det[2] - det[0]) * margin)
+            side_2 = int((det[3] - det[1]) * margin)
+
+            bb[0] = np.maximum(det[0] - side_1 / 2, 0)
+            bb[1] = np.maximum(det[1] - side_1 / 2, 0)
+            bb[2] = np.minimum(det[2] + side_2 / 2, img_size[1])
+            bb[3] = np.minimum(det[3] + side_2 / 2, img_size[0])
+            cropped = orig_img[bb[1]:bb[3], bb[0]:bb[2], :]
+            cropped_arr.append(cropped)
+            bounding_boxes_arr.append([bb[0], bb[1], bb[2], bb[3]])
+            # scaled = misc.imresize(cropped, (image_size, image_size), interp='bilinear')
+        return cropped_arr, bounding_boxes_arr
+    else:
+        return None
+#
+# if __name__ == '__main__':
+#     orig_img = misc.imread('orig_img.jpeg')
+#     cropped_arr, bounding_boxes_arr = align(orig_img)
+#     misc.imsave('test_output.jpeg', cropped_arr[0])
+#     print(bounding_boxes_arr)
+#
@@ -0,0 +1,794 @@
+""" Tensorflow implementation of the face detection / alignment algorithm found at
+https://github.com/kpzhang93/MTCNN_face_detection_alignment
+"""
+# MIT License
+# 
+# Copyright (c) 2016 David Sandberg
+# 
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+# 
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+# 
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+
+# from math import floor
+import cv2
+import numpy as np
+import tensorflow as tf
+from six import string_types, iteritems
+
+
+def layer(op):
+    """Decorator for composable network layers."""
+
+    def layer_decorated(self, *args, **kwargs):
+        # Automatically set a name if not provided.
+        name = kwargs.setdefault('name', self.get_unique_name(op.__name__))
+        # Figure out the layer inputs.
+        if len(self.terminals) == 0:
+            raise RuntimeError('No input variables found for layer %s.' % name)
+        elif len(self.terminals) == 1:
+            layer_input = self.terminals[0]
+        else:
+            layer_input = list(self.terminals)
+        # Perform the operation and get the output.
+        layer_output = op(self, layer_input, *args, **kwargs)
+        # Add to layer LUT.
+        self.layers[name] = layer_output
+        # This output is now the input for the next layer.
+        self.feed(layer_output)
+        # Return self for chained calls.
+        return self
+
+    return layer_decorated
+
+
+class Network(object):
+
+    def __init__(self, inputs, trainable=True):
+        # The input nodes for this network
+        self.inputs = inputs
+        # The current list of terminal nodes
+        self.terminals = []
+        # Mapping from layer names to layers
+        self.layers = dict(inputs)
+        # If true, the resulting variables are set as trainable
+        self.trainable = trainable
+
+        self.setup()
+
+    def setup(self):
+        """Construct the network. """
+        raise NotImplementedError('Must be implemented by the subclass.')
+
+    def load(self, data_path, session, ignore_missing=False):
+        """Load network weights.
+        data_path: The path to the numpy-serialized network weights
+        session: The current TensorFlow session
+        ignore_missing: If true, serialized weights for missing layers are ignored.
+        """
+        data_dict = np.load(data_path, encoding='latin1').item()  # pylint: disable=no-member
+
+        for op_name in data_dict:
+            with tf.variable_scope(op_name, reuse=True):
+                for param_name, data in iteritems(data_dict[op_name]):
+                    try:
+                        var = tf.get_variable(param_name)
+                        session.run(var.assign(data))
+                    except ValueError:
+                        if not ignore_missing:
+                            raise
+
+    def feed(self, *args):
+        """Set the input(s) for the next operation by replacing the terminal nodes.
+        The arguments can be either layer names or the actual layers.
+        """
+        assert len(args) != 0
+        self.terminals = []
+        for fed_layer in args:
+            if isinstance(fed_layer, string_types):
+                try:
+                    fed_layer = self.layers[fed_layer]
+                except KeyError:
+                    raise KeyError('Unknown layer name fed: %s' % fed_layer)
+            self.terminals.append(fed_layer)
+        return self
+
+    def get_output(self):
+        """Returns the current network output."""
+        return self.terminals[-1]
+
+    def get_unique_name(self, prefix):
+        """Returns an index-suffixed unique name for the given prefix.
+        This is used for auto-generating layer names based on the type-prefix.
+        """
+        ident = sum(t.startswith(prefix) for t, _ in self.layers.items()) + 1
+        return '%s_%d' % (prefix, ident)
+
+    def make_var(self, name, shape):
+        """Creates a new TensorFlow variable."""
+        return tf.get_variable(name, shape, trainable=self.trainable)
+
+    def validate_padding(self, padding):
+        """Verifies that the padding is one of the supported ones."""
+        assert padding in ('SAME', 'VALID')
+
+    @layer
+    def conv(self,
+             inp,
+             k_h,
+             k_w,
+             c_o,
+             s_h,
+             s_w,
+             name,
+             relu=True,
+             padding='SAME',
+             group=1,
+             biased=True):
+        # Verify that the padding is acceptable
+        self.validate_padding(padding)
+        # Get the number of channels in the input
+        c_i = int(inp.get_shape()[-1])
+        # Verify that the grouping parameter is valid
+        assert c_i % group == 0
+        assert c_o % group == 0
+        # Convolution for a given input and kernel
+        convolve = lambda i, k: tf.nn.conv2d(i, k, [1, s_h, s_w, 1], padding=padding)
+        with tf.variable_scope(name) as scope:
+            kernel = self.make_var('weights', shape=[k_h, k_w, c_i // group, c_o])
+            # This is the common-case. Convolve the input without any further complications.
+            output = convolve(inp, kernel)
+            # Add the biases
+            if biased:
+                biases = self.make_var('biases', [c_o])
+                output = tf.nn.bias_add(output, biases)
+            if relu:
+                # ReLU non-linearity
+                output = tf.nn.relu(output, name=scope.name)
+            return output
+
+    @layer
+    def prelu(self, inp, name):
+        with tf.variable_scope(name):
+            i = int(inp.get_shape()[-1])
+            alpha = self.make_var('alpha', shape=(i,))
+            output = tf.nn.relu(inp) + tf.multiply(alpha, -tf.nn.relu(-inp))
+        return output
+
+    @layer
+    def max_pool(self, inp, k_h, k_w, s_h, s_w, name, padding='SAME'):
+        self.validate_padding(padding)
+        return tf.nn.max_pool(inp,
+                              ksize=[1, k_h, k_w, 1],
+                              strides=[1, s_h, s_w, 1],
+                              padding=padding,
+                              name=name)
+
+    @layer
+    def fc(self, inp, num_out, name, relu=True):
+        with tf.variable_scope(name):
+            input_shape = inp.get_shape()
+            if input_shape.ndims == 4:
+                # The input is spatial. Vectorize it first.
+                dim = 1
+                for d in input_shape[1:].as_list():
+                    dim *= int(d)
+                feed_in = tf.reshape(inp, [-1, dim])
+            else:
+                feed_in, dim = (inp, input_shape[-1].value)
+            weights = self.make_var('weights', shape=[dim, num_out])
+            biases = self.make_var('biases', [num_out])
+            op = tf.nn.relu_layer if relu else tf.nn.xw_plus_b
+            fc = op(feed_in, weights, biases, name=name)
+            return fc
+
+    """
+    Multi dimensional softmax,
+    refer to https://github.com/tensorflow/tensorflow/issues/210
+    compute softmax along the dimension of target
+    the native softmax only supports batch_size x dimension
+    """
+
+    @layer
+    def softmax(self, target, axis, name=None):
+        max_axis = tf.reduce_max(target, axis, keepdims=True)
+        target_exp = tf.exp(target - max_axis)
+        normalize = tf.reduce_sum(target_exp, axis, keepdims=True)
+        softmax = tf.div(target_exp, normalize, name)
+        return softmax
+
+
+class PNet(Network):
+    def setup(self):
+        (self.feed('data')  # pylint: disable=no-value-for-parameter, no-member
+         .conv(3, 3, 10, 1, 1, padding='VALID', relu=False, name='conv1')
+         .prelu(name='PReLU1')
+         .max_pool(2, 2, 2, 2, name='pool1')
+         .conv(3, 3, 16, 1, 1, padding='VALID', relu=False, name='conv2')
+         .prelu(name='PReLU2')
+         .conv(3, 3, 32, 1, 1, padding='VALID', relu=False, name='conv3')
+         .prelu(name='PReLU3')
+         .conv(1, 1, 2, 1, 1, relu=False, name='conv4-1')
+         .softmax(3, name='prob1'))
+
+        (self.feed('PReLU3')  # pylint: disable=no-value-for-parameter
+         .conv(1, 1, 4, 1, 1, relu=False, name='conv4-2'))
+
+
+class RNet(Network):
+    def setup(self):
+        (self.feed('data')  # pylint: disable=no-value-for-parameter, no-member
+         .conv(3, 3, 28, 1, 1, padding='VALID', relu=False, name='conv1')
+         .prelu(name='prelu1')
+         .max_pool(3, 3, 2, 2, name='pool1')
+         .conv(3, 3, 48, 1, 1, padding='VALID', relu=False, name='conv2')
+         .prelu(name='prelu2')
+         .max_pool(3, 3, 2, 2, padding='VALID', name='pool2')
+         .conv(2, 2, 64, 1, 1, padding='VALID', relu=False, name='conv3')
+         .prelu(name='prelu3')
+         .fc(128, relu=False, name='conv4')
+         .prelu(name='prelu4')
+         .fc(2, relu=False, name='conv5-1')
+         .softmax(1, name='prob1'))
+
+        (self.feed('prelu4')  # pylint: disable=no-value-for-parameter
+         .fc(4, relu=False, name='conv5-2'))
+
+
+class ONet(Network):
+    def setup(self):
+        (self.feed('data')  # pylint: disable=no-value-for-parameter, no-member
+         .conv(3, 3, 32, 1, 1, padding='VALID', relu=False, name='conv1')
+         .prelu(name='prelu1')
+         .max_pool(3, 3, 2, 2, name='pool1')
+         .conv(3, 3, 64, 1, 1, padding='VALID', relu=False, name='conv2')
+         .prelu(name='prelu2')
+         .max_pool(3, 3, 2, 2, padding='VALID', name='pool2')
+         .conv(3, 3, 64, 1, 1, padding='VALID', relu=False, name='conv3')
+         .prelu(name='prelu3')
+         .max_pool(2, 2, 2, 2, name='pool3')
+         .conv(2, 2, 128, 1, 1, padding='VALID', relu=False, name='conv4')
+         .prelu(name='prelu4')
+         .fc(256, relu=False, name='conv5')
+         .prelu(name='prelu5')
+         .fc(2, relu=False, name='conv6-1')
+         .softmax(1, name='prob1'))
+
+        (self.feed('prelu5')  # pylint: disable=no-value-for-parameter
+         .fc(4, relu=False, name='conv6-2'))
+
+        (self.feed('prelu5')  # pylint: disable=no-value-for-parameter
+         .fc(10, relu=False, name='conv6-3'))
+
+
+def create_mtcnn(sess, model_path):
+    if not model_path:
+        model_path, _ = os.path.split(os.path.realpath(__file__))
+
+    with tf.variable_scope('pnet'):
+        data = tf.placeholder(tf.float32, (None, None, None, 3), 'input')
+        pnet = PNet({'data': data})
+        pnet.load(os.path.join(model_path, 'weights/det1.npy'), sess)
+    with tf.variable_scope('rnet'):
+        data = tf.placeholder(tf.float32, (None, 24, 24, 3), 'input')
+        rnet = RNet({'data': data})
+        rnet.load(os.path.join(model_path, 'weights/det2.npy'), sess)
+    with tf.variable_scope('onet'):
+        data = tf.placeholder(tf.float32, (None, 48, 48, 3), 'input')
+        onet = ONet({'data': data})
+        onet.load(os.path.join(model_path, 'weights/det3.npy'), sess)
+
+    pnet_fun = lambda img: sess.run(('pnet/conv4-2/BiasAdd:0', 'pnet/prob1:0'), feed_dict={'pnet/input:0': img})
+    rnet_fun = lambda img: sess.run(('rnet/conv5-2/conv5-2:0', 'rnet/prob1:0'), feed_dict={'rnet/input:0': img})
+    onet_fun = lambda img: sess.run(('onet/conv6-2/conv6-2:0', 'onet/conv6-3/conv6-3:0', 'onet/prob1:0'),
+                                    feed_dict={'onet/input:0': img})
+    return pnet_fun, rnet_fun, onet_fun
+
+
+def detect_face(img, minsize, pnet, rnet, onet, threshold, factor):
+    """Detects faces in an image, and returns bounding boxes and points for them.
+    img: input image
+    minsize: minimum faces' size
+    pnet, rnet, onet: caffemodel
+    threshold: threshold=[th1, th2, th3], th1-3 are three steps's threshold
+    factor: the factor used to create a scaling pyramid of face sizes to detect in the image.
+    """
+    factor_count = 0
+    total_boxes = np.empty((0, 9))
+    points = np.empty(0)
+    h = img.shape[0]
+    w = img.shape[1]
+    minl = np.amin([h, w])
+    m = 12.0 / minsize
+    minl = minl * m
+    # create scale pyramid
+    scales = []
+    while minl >= 12:
+        scales += [m * np.power(factor, factor_count)]
+        minl = minl * factor
+        factor_count += 1
+
+    # first stage
+    for scale in scales:
+        hs = int(np.ceil(h * scale))
+        ws = int(np.ceil(w * scale))
+        im_data = imresample(img, (hs, ws))
+        im_data = (im_data - 127.5) * 0.0078125
+        img_x = np.expand_dims(im_data, 0)
+        img_y = np.transpose(img_x, (0, 2, 1, 3))
+        out = pnet(img_y)
+        out0 = np.transpose(out[0], (0, 2, 1, 3))
+        out1 = np.transpose(out[1], (0, 2, 1, 3))
+
+        boxes, _ = generateBoundingBox(out1[0, :, :, 1].copy(), out0[0, :, :, :].copy(), scale, threshold[0])
+
+        # inter-scale nms
+        pick = nms(boxes.copy(), 0.5, 'Union')
+        if boxes.size > 0 and pick.size > 0:
+            boxes = boxes[pick, :]
+            total_boxes = np.append(total_boxes, boxes, axis=0)
+
+    numbox = total_boxes.shape[0]
+    if numbox > 0:
+        pick = nms(total_boxes.copy(), 0.7, 'Union')
+        total_boxes = total_boxes[pick, :]
+        regw = total_boxes[:, 2] - total_boxes[:, 0]
+        regh = total_boxes[:, 3] - total_boxes[:, 1]
+        qq1 = total_boxes[:, 0] + total_boxes[:, 5] * regw
+        qq2 = total_boxes[:, 1] + total_boxes[:, 6] * regh
+        qq3 = total_boxes[:, 2] + total_boxes[:, 7] * regw
+        qq4 = total_boxes[:, 3] + total_boxes[:, 8] * regh
+        total_boxes = np.transpose(np.vstack([qq1, qq2, qq3, qq4, total_boxes[:, 4]]))
+        total_boxes = rerec(total_boxes.copy())
+        total_boxes[:, 0:4] = np.fix(total_boxes[:, 0:4]).astype(np.int32)
+        dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h)
+
+    numbox = total_boxes.shape[0]
+    if numbox > 0:
+        # second stage
+        tempimg = np.zeros((24, 24, 3, numbox))
+        for k in range(0, numbox):
+            tmp = np.zeros((int(tmph[k]), int(tmpw[k]), 3))
+            tmp[dy[k] - 1:edy[k], dx[k] - 1:edx[k], :] = img[y[k] - 1:ey[k], x[k] - 1:ex[k], :]
+            if tmp.shape[0] > 0 and tmp.shape[1] > 0 or tmp.shape[0] == 0 and tmp.shape[1] == 0:
+                tempimg[:, :, :, k] = imresample(tmp, (24, 24))
+            else:
+                return np.empty()
+        tempimg = (tempimg - 127.5) * 0.0078125
+        tempimg1 = np.transpose(tempimg, (3, 1, 0, 2))
+        out = rnet(tempimg1)
+        out0 = np.transpose(out[0])
+        out1 = np.transpose(out[1])
+        score = out1[1, :]
+        ipass = np.where(score > threshold[1])
+        total_boxes = np.hstack([total_boxes[ipass[0], 0:4].copy(), np.expand_dims(score[ipass].copy(), 1)])
+        mv = out0[:, ipass[0]]
+        if total_boxes.shape[0] > 0:
+            pick = nms(total_boxes, 0.7, 'Union')
+            total_boxes = total_boxes[pick, :]
+            total_boxes = bbreg(total_boxes.copy(), np.transpose(mv[:, pick]))
+            total_boxes = rerec(total_boxes.copy())
+
+    numbox = total_boxes.shape[0]
+    if numbox > 0:
+        # third stage
+        total_boxes = np.fix(total_boxes).astype(np.int32)
+        dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h)
+        tempimg = np.zeros((48, 48, 3, numbox))
+        for k in range(0, numbox):
+            tmp = np.zeros((int(tmph[k]), int(tmpw[k]), 3))
+            tmp[dy[k] - 1:edy[k], dx[k] - 1:edx[k], :] = img[y[k] - 1:ey[k], x[k] - 1:ex[k], :]
+            if tmp.shape[0] > 0 and tmp.shape[1] > 0 or tmp.shape[0] == 0 and tmp.shape[1] == 0:
+                tempimg[:, :, :, k] = imresample(tmp, (48, 48))
+            else:
+                return np.empty()
+        tempimg = (tempimg - 127.5) * 0.0078125
+        tempimg1 = np.transpose(tempimg, (3, 1, 0, 2))
+        out = onet(tempimg1)
+        out0 = np.transpose(out[0])
+        out1 = np.transpose(out[1])
+        out2 = np.transpose(out[2])
+        score = out2[1, :]
+        points = out1
+        ipass = np.where(score > threshold[2])
+        points = points[:, ipass[0]]
+        total_boxes = np.hstack([total_boxes[ipass[0], 0:4].copy(), np.expand_dims(score[ipass].copy(), 1)])
+        mv = out0[:, ipass[0]]
+
+        w = total_boxes[:, 2] - total_boxes[:, 0] + 1
+        h = total_boxes[:, 3] - total_boxes[:, 1] + 1
+        points[0:5, :] = np.tile(w, (5, 1)) * points[0:5, :] + np.tile(total_boxes[:, 0], (5, 1)) - 1
+        points[5:10, :] = np.tile(h, (5, 1)) * points[5:10, :] + np.tile(total_boxes[:, 1], (5, 1)) - 1
+        if total_boxes.shape[0] > 0:
+            total_boxes = bbreg(total_boxes.copy(), np.transpose(mv))
+            pick = nms(total_boxes.copy(), 0.7, 'Min')
+            total_boxes = total_boxes[pick, :]
+            points = points[:, pick]
+
+    return total_boxes, points
+
+
+def bulk_detect_face(images, detection_window_size_ratio, pnet, rnet, onet, threshold, factor):
+    """Detects faces in a list of images
+    images: list containing input images
+    detection_window_size_ratio: ratio of minimum face size to smallest image dimension
+    pnet, rnet, onet: caffemodel
+    threshold: threshold=[th1 th2 th3], th1-3 are three steps's threshold [0-1]
+    factor: the factor used to create a scaling pyramid of face sizes to detect in the image.
+    """
+    all_scales = [None] * len(images)
+    images_with_boxes = [None] * len(images)
+
+    for i in range(len(images)):
+        images_with_boxes[i] = {'total_boxes': np.empty((0, 9))}
+
+    # create scale pyramid
+    for index, img in enumerate(images):
+        all_scales[index] = []
+        h = img.shape[0]
+        w = img.shape[1]
+        minsize = int(detection_window_size_ratio * np.minimum(w, h))
+        factor_count = 0
+        minl = np.amin([h, w])
+        if minsize <= 12:
+            minsize = 12
+
+        m = 12.0 / minsize
+        minl = minl * m
+        while minl >= 12:
+            all_scales[index].append(m * np.power(factor, factor_count))
+            minl = minl * factor
+            factor_count += 1
+
+    # # # # # # # # # # # # #
+    # first stage - fast proposal network (pnet) to obtain face candidates
+    # # # # # # # # # # # # #
+
+    images_obj_per_resolution = {}
+
+    # TODO: use some type of rounding to number module 8 to increase probability that pyramid images will have the same resolution across input images
+
+    for index, scales in enumerate(all_scales):
+        h = images[index].shape[0]
+        w = images[index].shape[1]
+
+        for scale in scales:
+            hs = int(np.ceil(h * scale))
+            ws = int(np.ceil(w * scale))
+
+            if (ws, hs) not in images_obj_per_resolution:
+                images_obj_per_resolution[(ws, hs)] = []
+
+            im_data = imresample(images[index], (hs, ws))
+            im_data = (im_data - 127.5) * 0.0078125
+            img_y = np.transpose(im_data, (1, 0, 2))  # caffe uses different dimensions ordering
+            images_obj_per_resolution[(ws, hs)].append({'scale': scale, 'image': img_y, 'index': index})
+
+    for resolution in images_obj_per_resolution:
+        images_per_resolution = [i['image'] for i in images_obj_per_resolution[resolution]]
+        outs = pnet(images_per_resolution)
+
+        for index in range(len(outs[0])):
+            scale = images_obj_per_resolution[resolution][index]['scale']
+            image_index = images_obj_per_resolution[resolution][index]['index']
+            out0 = np.transpose(outs[0][index], (1, 0, 2))
+            out1 = np.transpose(outs[1][index], (1, 0, 2))
+
+            boxes, _ = generateBoundingBox(out1[:, :, 1].copy(), out0[:, :, :].copy(), scale, threshold[0])
+
+            # inter-scale nms
+            pick = nms(boxes.copy(), 0.5, 'Union')
+            if boxes.size > 0 and pick.size > 0:
+                boxes = boxes[pick, :]
+                images_with_boxes[image_index]['total_boxes'] = np.append(images_with_boxes[image_index]['total_boxes'],
+                                                                          boxes,
+                                                                          axis=0)
+
+    for index, image_obj in enumerate(images_with_boxes):
+        numbox = image_obj['total_boxes'].shape[0]
+        if numbox > 0:
+            h = images[index].shape[0]
+            w = images[index].shape[1]
+            pick = nms(image_obj['total_boxes'].copy(), 0.7, 'Union')
+            image_obj['total_boxes'] = image_obj['total_boxes'][pick, :]
+            regw = image_obj['total_boxes'][:, 2] - image_obj['total_boxes'][:, 0]
+            regh = image_obj['total_boxes'][:, 3] - image_obj['total_boxes'][:, 1]
+            qq1 = image_obj['total_boxes'][:, 0] + image_obj['total_boxes'][:, 5] * regw
+            qq2 = image_obj['total_boxes'][:, 1] + image_obj['total_boxes'][:, 6] * regh
+            qq3 = image_obj['total_boxes'][:, 2] + image_obj['total_boxes'][:, 7] * regw
+            qq4 = image_obj['total_boxes'][:, 3] + image_obj['total_boxes'][:, 8] * regh
+            image_obj['total_boxes'] = np.transpose(np.vstack([qq1, qq2, qq3, qq4, image_obj['total_boxes'][:, 4]]))
+            image_obj['total_boxes'] = rerec(image_obj['total_boxes'].copy())
+            image_obj['total_boxes'][:, 0:4] = np.fix(image_obj['total_boxes'][:, 0:4]).astype(np.int32)
+            dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(image_obj['total_boxes'].copy(), w, h)
+
+            numbox = image_obj['total_boxes'].shape[0]
+            tempimg = np.zeros((24, 24, 3, numbox))
+
+            if numbox > 0:
+                for k in range(0, numbox):
+                    tmp = np.zeros((int(tmph[k]), int(tmpw[k]), 3))
+                    tmp[dy[k] - 1:edy[k], dx[k] - 1:edx[k], :] = images[index][y[k] - 1:ey[k], x[k] - 1:ex[k], :]
+                    if tmp.shape[0] > 0 and tmp.shape[1] > 0 or tmp.shape[0] == 0 and tmp.shape[1] == 0:
+                        tempimg[:, :, :, k] = imresample(tmp, (24, 24))
+                    else:
+                        return np.empty()
+
+                tempimg = (tempimg - 127.5) * 0.0078125
+                image_obj['rnet_input'] = np.transpose(tempimg, (3, 1, 0, 2))
+
+    # # # # # # # # # # # # #
+    # second stage - refinement of face candidates with rnet
+    # # # # # # # # # # # # #
+
+    bulk_rnet_input = np.empty((0, 24, 24, 3))
+    for index, image_obj in enumerate(images_with_boxes):
+        if 'rnet_input' in image_obj:
+            bulk_rnet_input = np.append(bulk_rnet_input, image_obj['rnet_input'], axis=0)
+
+    out = rnet(bulk_rnet_input)
+    out0 = np.transpose(out[0])
+    out1 = np.transpose(out[1])
+    score = out1[1, :]
+
+    i = 0
+    for index, image_obj in enumerate(images_with_boxes):
+        if 'rnet_input' not in image_obj:
+            continue
+
+        rnet_input_count = image_obj['rnet_input'].shape[0]
+        score_per_image = score[i:i + rnet_input_count]
+        out0_per_image = out0[:, i:i + rnet_input_count]
+
+        ipass = np.where(score_per_image > threshold[1])
+        image_obj['total_boxes'] = np.hstack([image_obj['total_boxes'][ipass[0], 0:4].copy(),
+                                              np.expand_dims(score_per_image[ipass].copy(), 1)])
+
+        mv = out0_per_image[:, ipass[0]]
+
+        if image_obj['total_boxes'].shape[0] > 0:
+            h = images[index].shape[0]
+            w = images[index].shape[1]
+            pick = nms(image_obj['total_boxes'], 0.7, 'Union')
+            image_obj['total_boxes'] = image_obj['total_boxes'][pick, :]
+            image_obj['total_boxes'] = bbreg(image_obj['total_boxes'].copy(), np.transpose(mv[:, pick]))
+            image_obj['total_boxes'] = rerec(image_obj['total_boxes'].copy())
+
+            numbox = image_obj['total_boxes'].shape[0]
+
+            if numbox > 0:
+                tempimg = np.zeros((48, 48, 3, numbox))
+                image_obj['total_boxes'] = np.fix(image_obj['total_boxes']).astype(np.int32)
+                dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(image_obj['total_boxes'].copy(), w, h)
+
+                for k in range(0, numbox):
+                    tmp = np.zeros((int(tmph[k]), int(tmpw[k]), 3))
+                    tmp[dy[k] - 1:edy[k], dx[k] - 1:edx[k], :] = images[index][y[k] - 1:ey[k], x[k] - 1:ex[k], :]
+                    if tmp.shape[0] > 0 and tmp.shape[1] > 0 or tmp.shape[0] == 0 and tmp.shape[1] == 0:
+                        tempimg[:, :, :, k] = imresample(tmp, (48, 48))
+                    else:
+                        return np.empty()
+                tempimg = (tempimg - 127.5) * 0.0078125
+                image_obj['onet_input'] = np.transpose(tempimg, (3, 1, 0, 2))
+
+        i += rnet_input_count
+
+    # # # # # # # # # # # # #
+    # third stage - further refinement and facial landmarks positions with onet
+    # # # # # # # # # # # # #
+
+    bulk_onet_input = np.empty((0, 48, 48, 3))
+    for index, image_obj in enumerate(images_with_boxes):
+        if 'onet_input' in image_obj:
+            bulk_onet_input = np.append(bulk_onet_input, image_obj['onet_input'], axis=0)
+
+    out = onet(bulk_onet_input)
+
+    out0 = np.transpose(out[0])
+    out1 = np.transpose(out[1])
+    out2 = np.transpose(out[2])
+    score = out2[1, :]
+    points = out1
+
+    i = 0
+    ret = []
+    for index, image_obj in enumerate(images_with_boxes):
+        if 'onet_input' not in image_obj:
+            ret.append(None)
+            continue
+
+        onet_input_count = image_obj['onet_input'].shape[0]
+
+        out0_per_image = out0[:, i:i + onet_input_count]
+        score_per_image = score[i:i + onet_input_count]
+        points_per_image = points[:, i:i + onet_input_count]
+
+        ipass = np.where(score_per_image > threshold[2])
+        points_per_image = points_per_image[:, ipass[0]]
+
+        image_obj['total_boxes'] = np.hstack([image_obj['total_boxes'][ipass[0], 0:4].copy(),
+                                              np.expand_dims(score_per_image[ipass].copy(), 1)])
+        mv = out0_per_image[:, ipass[0]]
+
+        w = image_obj['total_boxes'][:, 2] - image_obj['total_boxes'][:, 0] + 1
+        h = image_obj['total_boxes'][:, 3] - image_obj['total_boxes'][:, 1] + 1
+        points_per_image[0:5, :] = np.tile(w, (5, 1)) * points_per_image[0:5, :] + np.tile(
+            image_obj['total_boxes'][:, 0], (5, 1)) - 1
+        points_per_image[5:10, :] = np.tile(h, (5, 1)) * points_per_image[5:10, :] + np.tile(
+            image_obj['total_boxes'][:, 1], (5, 1)) - 1
+
+        if image_obj['total_boxes'].shape[0] > 0:
+            image_obj['total_boxes'] = bbreg(image_obj['total_boxes'].copy(), np.transpose(mv))
+            pick = nms(image_obj['total_boxes'].copy(), 0.7, 'Min')
+            image_obj['total_boxes'] = image_obj['total_boxes'][pick, :]
+            points_per_image = points_per_image[:, pick]
+
+            ret.append((image_obj['total_boxes'], points_per_image))
+        else:
+            ret.append(None)
+
+        i += onet_input_count
+
+    return ret
+
+
+# function [boundingbox] = bbreg(boundingbox,reg)
+def bbreg(boundingbox, reg):
+    """Calibrate bounding boxes"""
+    if reg.shape[1] == 1:
+        reg = np.reshape(reg, (reg.shape[2], reg.shape[3]))
+
+    w = boundingbox[:, 2] - boundingbox[:, 0] + 1
+    h = boundingbox[:, 3] - boundingbox[:, 1] + 1
+    b1 = boundingbox[:, 0] + reg[:, 0] * w
+    b2 = boundingbox[:, 1] + reg[:, 1] * h
+    b3 = boundingbox[:, 2] + reg[:, 2] * w
+    b4 = boundingbox[:, 3] + reg[:, 3] * h
+    boundingbox[:, 0:4] = np.transpose(np.vstack([b1, b2, b3, b4]))
+    return boundingbox
+
+
+def generateBoundingBox(imap, reg, scale, t):
+    """Use heatmap to generate bounding boxes"""
+    stride = 2
+    cellsize = 12
+
+    imap = np.transpose(imap)
+    dx1 = np.transpose(reg[:, :, 0])
+    dy1 = np.transpose(reg[:, :, 1])
+    dx2 = np.transpose(reg[:, :, 2])
+    dy2 = np.transpose(reg[:, :, 3])
+    y, x = np.where(imap >= t)
+    if y.shape[0] == 1:
+        dx1 = np.flipud(dx1)
+        dy1 = np.flipud(dy1)
+        dx2 = np.flipud(dx2)
+        dy2 = np.flipud(dy2)
+    score = imap[(y, x)]
+    reg = np.transpose(np.vstack([dx1[(y, x)], dy1[(y, x)], dx2[(y, x)], dy2[(y, x)]]))
+    if reg.size == 0:
+        reg = np.empty((0, 3))
+    bb = np.transpose(np.vstack([y, x]))
+    q1 = np.fix((stride * bb + 1) / scale)
+    q2 = np.fix((stride * bb + cellsize - 1 + 1) / scale)
+    boundingbox = np.hstack([q1, q2, np.expand_dims(score, 1), reg])
+    return boundingbox, reg
+
+
+# function pick = nms(boxes,threshold,type)
+def nms(boxes, threshold, method):
+    if boxes.size == 0:
+        return np.empty((0, 3))
+    x1 = boxes[:, 0]
+    y1 = boxes[:, 1]
+    x2 = boxes[:, 2]
+    y2 = boxes[:, 3]
+    s = boxes[:, 4]
+    area = (x2 - x1 + 1) * (y2 - y1 + 1)
+    I = np.argsort(s)
+    pick = np.zeros_like(s, dtype=np.int16)
+    counter = 0
+    while I.size > 0:
+        i = I[-1]
+        pick[counter] = i
+        counter += 1
+        idx = I[0:-1]
+        xx1 = np.maximum(x1[i], x1[idx])
+        yy1 = np.maximum(y1[i], y1[idx])
+        xx2 = np.minimum(x2[i], x2[idx])
+        yy2 = np.minimum(y2[i], y2[idx])
+        w = np.maximum(0.0, xx2 - xx1 + 1)
+        h = np.maximum(0.0, yy2 - yy1 + 1)
+        inter = w * h
+        if method is 'Min':
+            o = inter / np.minimum(area[i], area[idx])
+        else:
+            o = inter / (area[i] + area[idx] - inter)
+        I = I[np.where(o <= threshold)]
+    pick = pick[0:counter]
+    return pick
+
+
+# function [dy edy dx edx y ey x ex tmpw tmph] = pad(total_boxes,w,h)
+def pad(total_boxes, w, h):
+    """Compute the padding coordinates (pad the bounding boxes to square)"""
+    tmpw = (total_boxes[:, 2] - total_boxes[:, 0] + 1).astype(np.int32)
+    tmph = (total_boxes[:, 3] - total_boxes[:, 1] + 1).astype(np.int32)
+    numbox = total_boxes.shape[0]
+
+    dx = np.ones((numbox), dtype=np.int32)
+    dy = np.ones((numbox), dtype=np.int32)
+    edx = tmpw.copy().astype(np.int32)
+    edy = tmph.copy().astype(np.int32)
+
+    x = total_boxes[:, 0].copy().astype(np.int32)
+    y = total_boxes[:, 1].copy().astype(np.int32)
+    ex = total_boxes[:, 2].copy().astype(np.int32)
+    ey = total_boxes[:, 3].copy().astype(np.int32)
+
+    tmp = np.where(ex > w)
+    edx.flat[tmp] = np.expand_dims(-ex[tmp] + w + tmpw[tmp], 1)
+    ex[tmp] = w
+
+    tmp = np.where(ey > h)
+    edy.flat[tmp] = np.expand_dims(-ey[tmp] + h + tmph[tmp], 1)
+    ey[tmp] = h
+
+    tmp = np.where(x < 1)
+    dx.flat[tmp] = np.expand_dims(2 - x[tmp], 1)
+    x[tmp] = 1
+
+    tmp = np.where(y < 1)
+    dy.flat[tmp] = np.expand_dims(2 - y[tmp], 1)
+    y[tmp] = 1
+
+    return dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph
+
+
+# function [bboxA] = rerec(bboxA)
+def rerec(bboxA):
+    """Convert bboxA to square."""
+    h = bboxA[:, 3] - bboxA[:, 1]
+    w = bboxA[:, 2] - bboxA[:, 0]
+    l = np.maximum(w, h)
+    bboxA[:, 0] = bboxA[:, 0] + w * 0.5 - l * 0.5
+    bboxA[:, 1] = bboxA[:, 1] + h * 0.5 - l * 0.5
+    bboxA[:, 2:4] = bboxA[:, 0:2] + np.transpose(np.tile(l, (2, 1)))
+    return bboxA
+
+
+def imresample(img, sz):
+    im_data = cv2.resize(img, (sz[1], sz[0]), interpolation=cv2.INTER_AREA)  # @UndefinedVariable
+    return im_data
+
+    # This method is kept for debugging purpose
+#     h=img.shape[0]
+#     w=img.shape[1]
+#     hs, ws = sz
+#     dx = float(w) / ws
+#     dy = float(h) / hs
+#     im_data = np.zeros((hs,ws,3))
+#     for a1 in range(0,hs):
+#         for a2 in range(0,ws):
+#             for a3 in range(0,3):
+#                 im_data[a1,a2,a3] = img[int(floor(a1*dy)),int(floor(a2*dx)),a3]
+#     return im_data
@@ -47,7 +47,7 @@ class FawkesMaskGeneration:
                 max_iterations=MAX_ITERATIONS, initial_const=INITIAL_CONST,
                 intensity_range=INTENSITY_RANGE, l_threshold=L_THRESHOLD,
                 max_val=MAX_VAL, keep_final=KEEP_FINAL, maximize=MAXIMIZE, image_shape=IMAGE_SHAPE,
-                 verbose=0, ratio=RATIO, limit_dist=LIMIT_DIST):
+                 verbose=0, ratio=RATIO, limit_dist=LIMIT_DIST, faces=None):

        assert intensity_range in {'raw', 'imagenet', 'inception', 'mnist'}

@@ -69,10 +69,12 @@ class FawkesMaskGeneration:
        self.ratio = ratio
        self.limit_dist = limit_dist
        self.single_shape = list(image_shape)
+        self.faces = faces

        self.input_shape = tuple([self.batch_size] + self.single_shape)

        self.bottleneck_shape = tuple([self.batch_size] + self.single_shape)
+        # self.bottleneck_shape = tuple([self.batch_size, bottleneck_model_ls[0].output_shape[-1]])

        # the variable we're going to optimize over
        self.modifier = tf.Variable(np.zeros(self.input_shape, dtype=np.float32))
@@ -149,8 +151,6 @@ class FawkesMaskGeneration:
                     self.dist_raw,
                     tf.zeros_like(self.dist_raw)))
        self.dist_sum = tf.reduce_sum(tf.where(self.mask, self.dist, tf.zeros_like(self.dist)))
-        # self.dist_sum = 1e-5 * tf.reduce_sum(self.dist)
-        # self.dist_raw_sum = self.dist_sum

        def resize_tensor(input_tensor, model_input_shape):
            if input_tensor.shape[1:] == model_input_shape or model_input_shape[1] is None:
@@ -171,16 +171,14 @@ class FawkesMaskGeneration:

            self.bottleneck_a = bottleneck_model(cur_aimg_input)
            if self.MIMIC_IMG:
-                # cur_timg_input = resize_tensor(self.timg_input, model_input_shape)
-                # cur_simg_input = resize_tensor(self.simg_input, model_input_shape)
                cur_timg_input = self.timg_input
                cur_simg_input = self.simg_input
                self.bottleneck_t = calculate_direction(bottleneck_model, cur_timg_input, cur_simg_input)
-                # self.bottleneck_t = bottleneck_model(cur_timg_input)
            else:
                self.bottleneck_t = self.bottleneck_t_raw

            bottleneck_diff = self.bottleneck_t - self.bottleneck_a
+
            scale_factor = tf.sqrt(tf.reduce_sum(tf.square(self.bottleneck_t), axis=1))

            cur_bottlesim = tf.sqrt(tf.reduce_sum(tf.square(bottleneck_diff), axis=1))
@@ -189,7 +187,6 @@ class FawkesMaskGeneration:

            self.bottlesim += cur_bottlesim

-            # self.bottlesim_push += cur_bottlesim_push_sum
            self.bottlesim_sum += cur_bottlesim_sum

        # sum up the losses
@@ -202,20 +199,13 @@ class FawkesMaskGeneration:
                                               self.loss,
                                               tf.zeros_like(self.loss)))

-        # self.loss_sum = self.dist_sum + tf.reduce_sum(self.bottlesim)
-        # import pdb
-        # pdb.set_trace()
-        # self.loss_sum = tf.reduce_sum(tf.where(self.mask, self.loss, tf.zeros_like(self.loss)))
-
-        # Setup the Adadelta optimizer and keep track of variables
-        # we're creating
        start_vars = set(x.name for x in tf.global_variables())
        self.learning_rate_holder = tf.placeholder(tf.float32, shape=[])
+
        optimizer = tf.train.AdadeltaOptimizer(self.learning_rate_holder)
        # optimizer = tf.train.AdamOptimizer(self.learning_rate_holder)

-        self.train = optimizer.minimize(self.loss_sum,
-                                        var_list=[self.modifier])
+        self.train = optimizer.minimize(self.loss_sum, var_list=[self.modifier])
        end_vars = tf.global_variables()
        new_vars = [x for x in end_vars if x.name not in start_vars]

@@ -297,6 +287,7 @@ class FawkesMaskGeneration:
        LR = self.learning_rate
        nb_imgs = source_imgs.shape[0]
        mask = [True] * nb_imgs + [False] * (self.batch_size - nb_imgs)
+        # mask = [True] * self.batch_size
        mask = np.array(mask, dtype=np.bool)

        source_imgs = np.array(source_imgs)
@@ -317,19 +308,34 @@ class FawkesMaskGeneration:
            timg_tanh_batch = np.zeros(self.input_shape)
        else:
            timg_tanh_batch = np.zeros(self.bottleneck_shape)
+
        weights_batch = np.zeros(self.bottleneck_shape)
        simg_tanh_batch[:nb_imgs] = simg_tanh[:nb_imgs]
        timg_tanh_batch[:nb_imgs] = timg_tanh[:nb_imgs]
        weights_batch[:nb_imgs] = weights[:nb_imgs]
        modifier_batch = np.ones(self.input_shape) * 1e-6

-        self.sess.run(self.setup,
-                      {self.assign_timg_tanh: timg_tanh_batch,
-                       self.assign_simg_tanh: simg_tanh_batch,
-                       self.assign_const: CONST,
-                       self.assign_mask: mask,
-                       self.assign_weights: weights_batch,
-                       self.assign_modifier: modifier_batch})
+        temp_images = []
+
+        # set the variables so that we don't have to send them over again
+        if self.MIMIC_IMG:
+            self.sess.run(self.setup,
+                          {self.assign_timg_tanh: timg_tanh_batch,
+                           self.assign_simg_tanh: simg_tanh_batch,
+                           self.assign_const: CONST,
+                           self.assign_mask: mask,
+                           self.assign_weights: weights_batch,
+                           self.assign_modifier: modifier_batch})
+        else:
+            # if directly mimicking a vector, use assign_bottleneck_t_raw
+            # in setup
+            self.sess.run(self.setup,
+                          {self.assign_bottleneck_t_raw: timg_tanh_batch,
+                           self.assign_simg_tanh: simg_tanh_batch,
+                           self.assign_const: CONST,
+                           self.assign_mask: mask,
+                           self.assign_weights: weights_batch,
+                           self.assign_modifier: modifier_batch})

        best_bottlesim = [0] * nb_imgs if self.maximize else [np.inf] * nb_imgs
        best_adv = np.zeros_like(source_imgs)
@@ -347,6 +353,7 @@ class FawkesMaskGeneration:
                     dist_raw_sum,
                     bottlesim_sum / nb_imgs))

+        finished_idx = set()
        try:
            total_distance = [0] * nb_imgs

@@ -369,8 +376,14 @@ class FawkesMaskGeneration:
                    [self.dist_raw,
                     self.bottlesim,
                     self.aimg_input])
+
+                all_clear = True
                for e, (dist_raw, bottlesim, aimg_input) in enumerate(
                        zip(dist_raw_list, bottlesim_list, aimg_input_list)):
+
+                    if e in finished_idx:
+                        continue
+
                    if e >= nb_imgs:
                        break
                    if (bottlesim < best_bottlesim[e] and bottlesim > total_distance[e] * 0.1 and (
@@ -379,40 +392,55 @@ class FawkesMaskGeneration:
                        best_bottlesim[e] = bottlesim
                        best_adv[e] = aimg_input

-                if iteration != 0 and iteration % (self.MAX_ITERATIONS // 3) == 0:
-                    # LR = LR / 2
+                    # if iteration > 20 and (dist_raw >= self.l_threshold or iteration == self.MAX_ITERATIONS - 1):
+                    #     finished_idx.add(e)
+                    #     print("{} finished at dist {}".format(e, dist_raw))
+                    #     best_bottlesim[e] = bottlesim
+                    #     best_adv[e] = aimg_input
+                    #
+                    all_clear = False
+
+                if all_clear:
+                    break
+
+                if iteration != 0 and iteration % (self.MAX_ITERATIONS // 2) == 0:
+                    LR = LR / 2
                    print("Learning Rate: ", LR)

-                if iteration % (self.MAX_ITERATIONS // 10) == 0:
+                if iteration % (self.MAX_ITERATIONS // 5) == 0:
                    if self.verbose == 1:
-                        loss_sum = float(self.sess.run(self.loss_sum))
-                        dist_sum = float(self.sess.run(self.dist_sum))
-                        thresh_over = (dist_sum /
-                                       self.batch_size /
-                                       self.l_threshold *
-                                       100)
                        dist_raw_sum = float(self.sess.run(self.dist_raw_sum))
                        bottlesim_sum = self.sess.run(self.bottlesim_sum)
-                        print('ITER %4d: Total loss: %.4E; perturb: %.6f (%.2f%% over, raw: %.6f); sim: %f'
-                              % (iteration,
-                                 Decimal(loss_sum),
-                                 dist_sum,
-                                 thresh_over,
-                                 dist_raw_sum,
-                                 bottlesim_sum / nb_imgs))
+                        print('ITER %4d perturb: %.5f; sim: %f'
+                              % (iteration, dist_raw_sum / nb_imgs, bottlesim_sum / nb_imgs))
+
+                        # protected_images = aimg_input_list
+                        #
+                        # orginal_images = np.copy(self.faces.cropped_faces)
+                        # cloak_perturbation = reverse_process_cloaked(protected_images) - reverse_process_cloaked(
+                        #     orginal_images)
+                        # final_images = self.faces.merge_faces(cloak_perturbation)
+                        #
+                        # for p_img, img in zip(protected_images, final_images):
+                        #     dump_image(reverse_process_cloaked(p_img),
+                        #                "/home/shansixioing/fawkes/data/emily/emily_cloaked_cropped{}.png".format(iteration),
+                        #                format='png')
+                        #
+                        #     dump_image(img,
+                        #                "/home/shansixioing/fawkes/data/emily/emily_cloaked_{}.png".format(iteration),
+                        #                format='png')
+
        except KeyboardInterrupt:
            pass

        if self.verbose == 1:
            loss_sum = float(self.sess.run(self.loss_sum))
            dist_sum = float(self.sess.run(self.dist_sum))
-            thresh_over = (dist_sum / self.batch_size / self.l_threshold * 100)
            dist_raw_sum = float(self.sess.run(self.dist_raw_sum))
            bottlesim_sum = float(self.sess.run(self.bottlesim_sum))
-            print('END:       Total loss: %.4E; perturb: %.6f (%.2f%% over, raw: %.6f); sim: %f'
+            print('END:       Total loss: %.4E; perturb: %.6f (raw: %.6f); sim: %f'
                  % (Decimal(loss_sum),
                     dist_sum,
-                     thresh_over,
                     dist_raw_sum,
                     bottlesim_sum / nb_imgs))

@@ -0,0 +1 @@
+837da51fc1cd7e21f6989badd07c3ccec543833e
@@ -6,21 +6,16 @@ import sys

 import numpy as np
 from differentiator import FawkesMaskGeneration
-from keras.applications.vgg16 import preprocess_input
-from keras.preprocessing import image
-from skimage.transform import resize
-from tensorflow import set_random_seed
-from utils import load_extractor, init_gpu, select_target_label, dump_image, reverse_process_cloaked
+from utils import load_extractor, init_gpu, select_target_label, dump_image, reverse_process_cloaked, \
+    Faces

 random.seed(12243)
 np.random.seed(122412)
-set_random_seed(12242)

-BATCH_SIZE = 1
-MAX_ITER = 1000
+BATCH_SIZE = 10


-def generate_cloak_images(sess, feature_extractors, image_X, target_X=None, th=0.01):
+def generate_cloak_images(sess, feature_extractors, image_X, target_emb=None, th=0.01, faces=None):
    batch_size = BATCH_SIZE if len(image_X) > BATCH_SIZE else len(image_X)

    differentiator = FawkesMaskGeneration(sess, feature_extractors,
@@ -29,92 +24,117 @@ def generate_cloak_images(sess, feature_extractors, image_X, target_X=None, th=0
                                          intensity_range='imagenet',
                                          initial_const=args.sd,
                                          learning_rate=args.lr,
-                                          max_iterations=MAX_ITER,
+                                          max_iterations=args.max_step,
                                          l_threshold=th,
-                                          verbose=1, maximize=False, keep_final=False, image_shape=image_X.shape[1:])
+                                          verbose=1, maximize=False, keep_final=False, image_shape=image_X.shape[1:],
+                                          faces=faces)

-    cloaked_image_X = differentiator.attack(image_X, target_X)
+    cloaked_image_X = differentiator.attack(image_X, target_emb)
    return cloaked_image_X


 def get_mode_config(mode):
    if mode == 'low':
        args.feature_extractor = "low_extract"
+        # args.th = 0.003
        args.th = 0.001
    elif mode == 'mid':
        args.feature_extractor = "mid_extract"
-        args.th = 0.001
+        args.th = 0.004
    elif mode == 'high':
        args.feature_extractor = "high_extract"
-        args.th = 0.005
+        args.th = 0.004
    elif mode == 'ultra':
        args.feature_extractor = "high_extract"
-        args.th = 0.007
+        args.th = 0.03
    elif mode == 'custom':
        pass
    else:
        raise Exception("mode must be one of 'low', 'mid', 'high', 'ultra', 'custom'")


-def extract_faces(img):
-    #  wait on Huiying
-    return preprocess_input(resize(img, (224, 224)))
+def check_imgs(imgs):
+    if np.max(imgs) <= 1 and np.min(imgs) >= 0:
+        imgs = imgs * 255.0
+    elif np.max(imgs) <= 255 and np.min(imgs) >= 0:
+        pass
+    else:
+        raise Exception("Image values ")
+    return imgs


 def fawkes():
+    assert args.format in ['png', 'jpg', 'jpeg']
+    if args.format == 'jpg':
+        args.format = 'jpeg'
    get_mode_config(args.mode)

    sess = init_gpu(args.gpu)
-    feature_extractors_ls = [load_extractor(args.feature_extractor)]
+    # feature_extractors_ls = [load_extractor(args.feature_extractor)]
+    # fs_names = ['mid_extract', 'high_extract']
+    fs_names = [args.feature_extractor]
+    feature_extractors_ls = [load_extractor(name) for name in fs_names]

    image_paths = glob.glob(os.path.join(args.directory, "*"))
    image_paths = [path for path in image_paths if "_cloaked" not in path.split("/")[-1]]

-    orginal_images = [extract_faces(image.img_to_array(image.load_img(cur_path))) for cur_path in
-                      image_paths]
+    faces = Faces(image_paths, sess)

+    orginal_images = faces.cropped_faces
    orginal_images = np.array(orginal_images)

-    if args.seperate_target:
-        target_images = []
+    if args.separate_target:
+        target_embedding = []
        for org_img in orginal_images:
            org_img = org_img.reshape([1] + list(org_img.shape))
-            tar_img = select_target_label(org_img, feature_extractors_ls, [args.feature_extractor])
-            target_images.append(tar_img)
-        target_images = np.concatenate(target_images)
+            tar_emb = select_target_label(org_img, feature_extractors_ls, fs_names)
+            target_embedding.append(tar_emb)
+        target_embedding = np.concatenate(target_embedding)
    else:
-        target_images = select_target_label(orginal_images, feature_extractors_ls, [args.feature_extractor])
+        target_embedding = select_target_label(orginal_images, feature_extractors_ls, fs_names)

    protected_images = generate_cloak_images(sess, feature_extractors_ls, orginal_images,
-                                             target_X=target_images, th=args.th)
+                                             target_emb=target_embedding, th=args.th, faces=faces)

-    for p_img, path in zip(protected_images, image_paths):
-        p_img = reverse_process_cloaked(p_img)
-        file_name = "{}_cloaked.jpeg".format(".".join(path.split(".")[:-1]))
-        dump_image(p_img, file_name, format="JPEG")
+    faces.cloaked_cropped_faces = protected_images
+
+    cloak_perturbation = reverse_process_cloaked(protected_images) - reverse_process_cloaked(orginal_images)
+    final_images = faces.merge_faces(cloak_perturbation)
+
+    for p_img, cloaked_img, path in zip(final_images, protected_images, image_paths):
+        file_name = "{}_{}_{}_{}_cloaked.{}".format(".".join(path.split(".")[:-1]), args.mode, args.th,
+                                                     args.feature_extractor, args.format)
+        dump_image(p_img, file_name, format=args.format)
+        #
+        # file_name = "{}_{}_{}_{}_cloaked_cropped.png".format(".".join(path.split(".")[:-1]), args.mode, args.th,
+        #                                                      args.feature_extractor)
+        # dump_image(reverse_process_cloaked(cloaked_img), file_name, format="png")


 def parse_arguments(argv):
    parser = argparse.ArgumentParser()
-    parser.add_argument('--directory', type=str,
+    parser.add_argument('--directory', '-d', type=str,
                        help='directory that contain images for cloaking', default='imgs/')

    parser.add_argument('--gpu', type=str,
                        help='GPU id', default='0')

    parser.add_argument('--mode', type=str,
-                        help='cloak generation mode', default='mid')
+                        help='cloak generation mode', default='high')
    parser.add_argument('--feature-extractor', type=str,
                        help="name of the feature extractor used for optimization",
-                        default="mid_extract")
+                        default="high_extract")

-    parser.add_argument('--th', type=float, default=0.005)
+    parser.add_argument('--th', type=float, default=0.01)
+    parser.add_argument('--max-step', type=int, default=200)
    parser.add_argument('--sd', type=int, default=1e9)
-    parser.add_argument('--lr', type=float, default=1)
+    parser.add_argument('--lr', type=float, default=10)

-    parser.add_argument('--result_directory', type=str, default="../results")
-    parser.add_argument('--seperate_target', action='store_true')
+    parser.add_argument('--separate_target', action='store_true')

+    parser.add_argument('--format', type=str,
+                        help="final image format",
+                        default="jpg")
    return parser.parse_args(argv)


@@ -1,3 +1,5 @@
+import glob
+import gzip
 import json
 import os
 import pickle
@@ -7,12 +9,16 @@ import keras
 import keras.backend as K
 import numpy as np
 import tensorflow as tf
+from align_face import align, aligner
 from keras.applications.vgg16 import preprocess_input
 from keras.layers import Dense, Activation
 from keras.models import Model
 from keras.preprocessing import image
+from keras.utils import get_file
 from keras.utils import to_categorical
+from skimage.transform import resize
 from sklearn.metrics import pairwise_distances
+from PIL import Image, ExifTags


 def clip_img(X, preprocessing='raw'):
@@ -22,6 +28,86 @@ def clip_img(X, preprocessing='raw'):
    return X


+def load_image(path):
+    img = Image.open(path)
+    if img._getexif() is not None:
+        for orientation in ExifTags.TAGS.keys():
+            if ExifTags.TAGS[orientation] == 'Orientation':
+                break
+
+        exif = dict(img._getexif().items())
+        if orientation in exif.keys():
+            if exif[orientation] == 3:
+                img = img.rotate(180, expand=True)
+            elif exif[orientation] == 6:
+                img = img.rotate(270, expand=True)
+            elif exif[orientation] == 8:
+                img = img.rotate(90, expand=True)
+            else:
+                pass
+    img = img.convert('RGB')
+    image_array = image.img_to_array(img)
+
+    return image_array
+
+
+class Faces(object):
+    def __init__(self, image_paths, sess):
+        self.aligner = aligner(sess)
+        self.org_faces = []
+        self.cropped_faces = []
+        self.cropped_faces_shape = []
+        self.cropped_index = []
+        self.callback_idx = []
+        for i, p in enumerate(image_paths):
+            cur_img = load_image(p)
+            self.org_faces.append(cur_img)
+            align_img = align(cur_img, self.aligner, margin=0.7)
+            cur_faces = align_img[0]
+
+            cur_shapes = [f.shape[:-1] for f in cur_faces]
+
+            cur_faces_square = []
+            for img in cur_faces:
+                long_size = max([img.shape[1], img.shape[0]])
+                base = np.zeros((long_size, long_size, 3))
+                base[0:img.shape[0], 0:img.shape[1], :] = img
+                cur_faces_square.append(base)
+
+            cur_index = align_img[1]
+            cur_faces_square = [resize(f, (224, 224)) for f in cur_faces_square]
+            self.cropped_faces_shape.extend(cur_shapes)
+            self.cropped_faces.extend(cur_faces_square)
+            self.cropped_index.extend(cur_index)
+            self.callback_idx.extend([i] * len(cur_faces_square))
+
+        self.cropped_faces = preprocess_input(np.array(self.cropped_faces))
+        self.cloaked_cropped_faces = None
+        self.cloaked_faces = np.copy(self.org_faces)
+
+    def get_faces(self):
+        return self.cropped_faces
+
+    def merge_faces(self, cloaks):
+        # import pdb
+        # pdb.set_trace()
+
+        self.cloaked_faces = np.copy(self.org_faces)
+
+        for i in range(len(self.cropped_faces)):
+            cur_cloak = cloaks[i]
+            org_shape = self.cropped_faces_shape[i]
+            old_square_shape = max([org_shape[0], org_shape[1]])
+            reshape_cloak = resize(cur_cloak, (old_square_shape, old_square_shape))
+            reshape_cloak = reshape_cloak[0:org_shape[0], 0:org_shape[1], :]
+
+            callback_id = self.callback_idx[i]
+            bb = self.cropped_index[i]
+            self.cloaked_faces[callback_id][bb[1]:bb[3], bb[0]:bb[2], :] += reshape_cloak
+
+        return self.cloaked_faces
+
+
 def dump_dictionary_as_json(dict, outfile):
    j = json.dumps(dict)
    with open(outfile, "wb") as f:
@@ -30,10 +116,12 @@ def dump_dictionary_as_json(dict, outfile):

 def fix_gpu_memory(mem_fraction=1):
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
-    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=mem_fraction)
-    tf_config = tf.ConfigProto(gpu_options=gpu_options)
-    tf_config.gpu_options.allow_growth = True
-    tf_config.log_device_placement = False
+    tf_config = None
+    if tf.test.is_gpu_available():
+        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=mem_fraction)
+        tf_config = tf.ConfigProto(gpu_options=gpu_options)
+        tf_config.gpu_options.allow_growth = True
+        tf_config.log_device_placement = False
    init_op = tf.global_variables_initializer()
    sess = tf.Session(config=tf_config)
    sess.run(init_op)
@@ -45,7 +133,6 @@ def load_victim_model(number_classes, teacher_model=None, end2end=False):
    for l in teacher_model.layers:
        l.trainable = end2end
    x = teacher_model.layers[-1].output
-
    x = Dense(number_classes)(x)
    x = Activation('softmax', name="act")(x)
    model = Model(teacher_model.input, x)
@@ -141,6 +228,7 @@ def imagenet_preprocessing(x, data_format=None):

    return x

+
 def imagenet_reverse_preprocessing(x, data_format=None):
    import keras.backend as K
    x = np.array(x)
@@ -185,7 +273,20 @@ def build_bottleneck_model(model, cut_off):


 def load_extractor(name):
-    model = keras.models.load_model("../feature_extractors/{}.h5".format(name))
+    model_dir = os.path.join(os.path.expanduser('~'), '.fawkes')
+    os.makedirs(model_dir, exist_ok=True)
+    model_file = os.path.join(model_dir, "{}.h5".format(name))
+    if os.path.exists(model_file):
+        model = keras.models.load_model(model_file)
+    else:
+        get_file("{}.h5".format(name), "http://sandlab.cs.uchicago.edu/fawkes/files/{}.h5".format(name),
+                 cache_dir=model_dir, cache_subdir='')
+
+        get_file("{}_emb.p.gz".format(name), "http://sandlab.cs.uchicago.edu/fawkes/files/{}_emb.p.gz".format(name),
+                 cache_dir=model_dir, cache_subdir='')
+
+        model = keras.models.load_model(model_file)
+
    if hasattr(model.layers[-1], "activation") and model.layers[-1].activation == "softmax":
        raise Exception(
            "Given extractor's last layer is softmax, need to remove the top layers to make it into a feature extractor")
@@ -199,11 +300,13 @@ def load_extractor(name):
    return model


+
 def get_dataset_path(dataset):
-    if not os.path.exists("config.json"):
+    model_dir = os.path.join(os.path.expanduser('~'), '.fawkes')
+    if not os.path.exists(os.path.join(model_dir, "config.json")):
        raise Exception("Please config the datasets before running protection code. See more in README and config.py.")

-    config = json.load(open("config.json", 'r'))
+    config = json.load(open(os.path.join(model_dir, "config.json"), 'r'))
    if dataset not in config:
        raise Exception(
            "Dataset {} does not exist, please download to data/ and add the path to this function... Abort".format(
@@ -217,7 +320,8 @@ def normalize(x):


 def dump_image(x, filename, format="png", scale=False):
-    img = image.array_to_img(x, scale=scale)
+    # img = image.array_to_img(x, scale=scale)
+    img = image.array_to_img(x)
    img.save(filename, format)
    return

@@ -235,9 +339,13 @@ def load_dir(path):


 def load_embeddings(feature_extractors_names):
+    model_dir = os.path.join(os.path.expanduser('~'), '.fawkes')
    dictionaries = []
    for extractor_name in feature_extractors_names:
-        path2emb = pickle.load(open("../feature_extractors/embeddings/{}_emb_norm.p".format(extractor_name), "rb"))
+        fp = gzip.open(os.path.join(model_dir, "{}_emb.p.gz".format(extractor_name)), 'rb')
+        path2emb = pickle.load(fp)
+        fp.close()
+
        dictionaries.append(path2emb)

    merge_dict = {}
@@ -272,6 +380,8 @@ def calculate_dist_score(a, b, feature_extractors_ls, metric='l2'):


 def select_target_label(imgs, feature_extractors_ls, feature_extractors_names, metric='l2'):
+    model_dir = os.path.join(os.path.expanduser('~'), '.fawkes')
+
    original_feature_x = extractor_ls_predict(feature_extractors_ls, imgs)

    path2emb = load_embeddings(feature_extractors_names)
@@ -282,37 +392,25 @@ def select_target_label(imgs, feature_extractors_ls, feature_extractors_names, m

    pair_dist = pairwise_distances(original_feature_x, embs, metric)
    max_sum = np.min(pair_dist, axis=0)
-    sorted_idx = np.argsort(max_sum)[::-1]
+    max_id = np.argmax(max_sum)

-    highest_num = 0
-    paired_target_X = None
-    final_target_class_path = None
-    for idx in sorted_idx[:1]:
-        target_class_path = paths[idx]
-        cur_target_X = load_dir(target_class_path)
-        cur_target_X = np.concatenate([cur_target_X, cur_target_X, cur_target_X])
-        cur_tot_sum, cur_paired_target_X = calculate_dist_score(imgs, cur_target_X,
-                                                                feature_extractors_ls,
-                                                                metric=metric)
-        if cur_tot_sum > highest_num:
-            highest_num = cur_tot_sum
-            paired_target_X = cur_paired_target_X
+    image_paths = glob.glob(os.path.join(model_dir, "target_data/{}/*".format(paths[int(max_id)])))
+    target_images = [image.img_to_array(image.load_img(cur_path)) for cur_path in
+                     image_paths]
+    target_images = preprocess_input(np.array([resize(x, (224, 224)) for x in target_images]))

-    np.random.shuffle(paired_target_X)
-    paired_target_X = list(paired_target_X)
-    while len(paired_target_X) < len(imgs):
-        paired_target_X += paired_target_X
-
-    paired_target_X = paired_target_X[:len(imgs)]
-    return np.array(paired_target_X)
+    target_images = list(target_images)
+    while len(target_images) < len(imgs):
+        target_images += target_images

+    target_images = random.sample(target_images, len(imgs))
+    return np.array(target_images)


 class CloakData(object):
    def __init__(self, protect_directory=None, img_shape=(224, 224)):

        self.img_shape = img_shape
-
        # self.train_data_dir, self.test_data_dir, self.number_classes, self.number_samples = get_dataset_path(dataset)
        # self.all_labels = sorted(list(os.listdir(self.train_data_dir)))
        self.protect_directory = protect_directory