Browse Source

Initial Commit

master
wchen342 5 years ago
commit
2d92197d6c
  1. 126
      data_processing/classes.csv
  2. 64
      data_processing/tfrecord.py
  3. 191
      main_single_sk.py
  4. 17
      src_single/config.py
  5. 665
      src_single/graph_supervised.py
  6. 105
      src_single/inception_score.py
  7. 71
      src_single/inception_utils.py
  8. 324
      src_single/inception_v4.py
  9. 305
      src_single/input_pipeline_rand_mix.py
  10. 401
      src_single/models_mru_multi_res.py
  11. 713
      src_single/mru.py
  12. 52
      src_single/sn.py
  13. 360
      src_single/train_single.py
  14. 276
      src_single/vgg.py

126
data_processing/classes.csv

@ -0,0 +1,126 @@
Name,ID,Image_num,Used
airplane,n02691156,1434,1
alarm_clock,n02694662,1442,1
ant,n02219486,1656,1
ape,n02470325,1332,1
apple,n07739125,1319,1
armor,n02895154,942,0
axe,n02764044,1254,1
banana,n07753592,1409,1
bat,n02139199,1304,1
bear,n02131653,1688,1
bee,n02206856,1672,1
beetle,n02167151,1077,1
bell,n02824448,1227,1
bench,n02828884,1355,1
bicycle,n02834778,1344,1
blimp,n02850950,1110,1
bread,n07679356,1249,1
butterfly,n02274259,2115,1
cabin,n02932400,1250,1
camel,n02437136,1428,1
candle,n02948072,1789,1
cannon,n02950826,1220,1
car_(sedan),n02958343,1307,1
castle,n02980441,1379,1
cat,n02121620,1485,1
chair,n02738535,1592,1
chicken,n01791625,1210,1
church,n03028079,1329,1
couch,n04256520,1686,1
cow,n01887787,1588,1
crab,n01978287,1214,1
crocodilian,n01697457,1188,1
cup,n03063073,1260,1
deer,n02430045,1680,1
dog,n02103406,1340,1
dolphin,n02068974,1460,1
door,n03222176,1384,1
duck,n01846331,1642,1
elephant,n02503517,1387,1
eyeglasses,n04272054,1280,0
fan,n03271574,1223,1
fish,n01440764,1133,1
flower,n11669921,1924,1
frog,n01639765,1222,1
geyser,n09288635,1585,1
giraffe,n02439033,1256,1
guitar,n02676566,2017,1
hamburger,n07697100,1373,1
hammer,n03481172,1390,1
harp,n03495258,1822,1
hat,n02859184,1319,0
hedgehog,n02346627,1147,1
helicopter,n03512147,1247,1
hermit_crab,n01986214,1430,1
horse,n02374451,1402,1
hot-air_balloon,n02782093,1240,1
hotdog,n07697537,1257,1
hourglass,n03544143,1176,1
jack-o-lantern,n03590841,1712,1
jellyfish,n01910747,1635,1
kangaroo,n01877134,1556,1
knife,n02973904,999,0
lion,n02129165,1795,1
lizard,n01674464,1202,1
lobster,n01983481,1123,1
motorcycle,n03790512,1380,1
mouse,n02330245,1252,1
mushroom,n12997919,1406,1
owl,n01621127,1296,1
parrot,n01816887,1149,1
pear,n07767847,1279,1
penguin,n02055803,1281,1
piano,n03452741,1318,1
pickup_truck,n03930630,1443,1
pig,n02395406,1463,1
pineapple,n07753275,1209,1
pistol,n03948459,1338,1
pizza,n07873807,1289,1
pretzel,n07695742,1263,1
rabbit,n02325366,1280,1
raccoon,n02508021,1722,1
racket,n02772700,1402,0
ray,n01496331,967,1
rhinoceros,n02391994,1496,1
rifle,n02749479,1172,0
rocket,n03773504,1220,1
sailboat,n04128499,1191,1
saw,n02770585,371,0
saxophone,n04141076,1673,0
scissors,n03044934,562,0
scorpion,n01770393,1197,1
sea_turtle,n01663401,1485,1
seagull,n02041246,1413,1
seal,n02076196,1832,1
shark,n01482330,1219,1
sheep,n02411705,1273,1
shoe,n02882894,1381,0
skyscraper,n04233124,1546,1
snail,n01944390,1837,1
snake,n01726692,1289,1
songbird,n01527347,1536,1
spider,n01772222,1249,1
spoon,n03633091,1817,1
squirrel,n02355227,1192,1
starfish,n02317335,1396,1
strawberry,n07745940,1478,1
swan,n01858441,1314,1
sword,n03039493,533,0
table,n03201208,2217,1
tank,n04389033,1488,1
teapot,n04398044,1590,1
teddy_bear,n04399382,0,0
tiger,n02129604,2086,1
tree,n11608250,1148,1
trumpet,n03110669,2092,1
turtle,n01669191,1328,1
umbrella,n04507155,1341,1
violin,n04536866,1330,0
volcano,n09472597,1404,1
wading_bird,n02000954,1178,1
wheelchair,n04576002,1245,0
windmill,n04587404,1899,1
window,n03227184,493,0
wine_bottle,n04591713,1258,1
zebra,n02391049,1474,1

64
data_processing/tfrecord.py

@ -0,0 +1,64 @@
import csv
import numpy as np
import tensorflow as tf
import cv2
def check_repeat(seq):
seen = set()
seen_add = seen.add
seen_twice = set(x for x in seq if x in seen or seen_add(x))
return list(seen_twice)
def binarize(sketch, threshold=245):
sketch[sketch < threshold] = 0
sketch[sketch >= threshold] = 255
return sketch
def showImg(img):
cv2.imshow("test", img)
cv2.waitKey(-1)
def dense_to_one_hot(labels_dense, num_classes):
"""Convert class labels from scalars to one-hot vectors."""
num_labels = labels_dense.shape[0]
index_offset = np.arange(num_labels) * num_classes
labels_one_hot = np.zeros((num_labels, num_classes), dtype=np.int32)
labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1
return labels_one_hot
def bytes_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def int64_feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
def read_csv(filename):
with open(filename) as csvfile:
reader = csv.DictReader(csvfile)
l = list(reader)
return l
def read_txt(filename):
with open(filename) as txtfile:
lines = txtfile.readlines()
return [l[:-1] for l in lines]
def split_csvlist(stat_info):
cat = list(set([item['Category'] for item in stat_info]))
l = []
for c in cat:
li = [item for item in stat_info if item['Category'] == c]
l.append(li)
return cat, l

191
main_single_sk.py

@ -0,0 +1,191 @@
import argparse
import importlib
import os
import sys
import shutil
import json
import tensorflow as tf
from time import gmtime, strftime
src_dir = './src_single'
def launch_training(**kwargs):
# Deal with file and paths
appendix = kwargs["resume_from"]
if appendix is None or appendix == '':
cur_time = strftime("%Y-%m-%d-%H-%M-%S", gmtime())
log_dir = './log_skgan_' + cur_time
ckpt_dir = './ckpt_skgan_' + cur_time
if not os.path.isdir(log_dir) and not os.path.exists(log_dir):
os.makedirs(log_dir)
if not os.path.isdir(ckpt_dir) and not os.path.exists(ckpt_dir):
os.makedirs(ckpt_dir)
# copy current script in src folder to log dir for record
if not os.path.exists(src_dir) or not os.path.isdir(src_dir):
print("src folder does not exist.")
return
else:
for file in os.listdir(src_dir):
if file.endswith(".py"):
shutil.copy(os.path.join(src_dir, file), log_dir)
kwargs['log_dir'] = log_dir
kwargs['ckpt_dir'] = ckpt_dir
appendix = cur_time
kwargs["resume_from"] = appendix
kwargs["iter_from"] = 0
# Save parameters
with open(os.path.join(log_dir, 'param_%d.json' % 0), 'w') as fp:
json.dump(kwargs, fp, indent=4)
sys.path.append(src_dir)
entry_point_module = kwargs['entry_point']
from config import Config
Config.set_from_dict(kwargs)
print("Launching new train: %s" % cur_time)
else:
if len(appendix.split('-')) != 6:
print("Invalid resume folder")
return
log_dir = './log_skgan_' + appendix
ckpt_dir = './ckpt_skgan_' + appendix
# Get last parameters (recover entry point module name)
json_files = [f for f in os.listdir(log_dir) if
os.path.isfile(os.path.join(log_dir, f)) and os.path.splitext(f)[1] == '.json']
iter_starts = max([int(os.path.splitext(filename)[0].split('_')[1]) for filename in json_files])
with open(os.path.join(log_dir, 'param_%d.json' % iter_starts), 'r') as fp:
params = json.load(fp)
entry_point_module = params['entry_point']
# Recover parameters
_ignored = ['num_gpu', 'iter_from']
for k, v in params.items():
if k not in _ignored:
kwargs[k] = v
sys.path.append(log_dir)
# Get latest checkpoint filename
# if stage == 1:
# ckpt_file = tf.train.latest_checkpoint(stage_1_ckpt_dir)
# elif stage == 2:
ckpt_file = tf.train.latest_checkpoint(ckpt_dir)
if ckpt_file is None:
raise RuntimeError
else:
iter_from = int(os.path.split(ckpt_file)[1].split('-')[1]) + 1
kwargs['log_dir'] = log_dir
kwargs['ckpt_dir'] = ckpt_dir
kwargs['iter_from'] = iter_from
# Save new set of parameters
with open(os.path.join(log_dir, 'param_%d.json' % iter_from), 'w') as fp:
kwargs['entry_point'] = entry_point_module
json.dump(kwargs, fp, indent=4)
from config import Config
Config.set_from_dict(kwargs)
print("Launching train from checkpoint: %s" % appendix)
# Launch train
train_module = importlib.import_module(entry_point_module)
# from train_paired_aug_multi_gpu import train
status = train_module.train(**kwargs)
return status, appendix
def launch_test(**kwargs):
# Deal with file and paths
appendix = kwargs["resume_from"]
if appendix is None or appendix == '' or len(appendix.split('-')) != 6:
print("Invalid resume folder")
return
log_dir = './log_skgan_' + appendix
ckpt_dir = './ckpt_skgan_' + appendix
sys.path.append(log_dir)
# Get latest checkpoint filename
kwargs['log_dir'] = log_dir
kwargs['ckpt_dir'] = ckpt_dir
# Get last parameters (recover entry point module name)
# Assuming last json file
json_files = [f for f in os.listdir(log_dir) if
os.path.isfile(os.path.join(log_dir, f)) and os.path.splitext(f)[1] == '.json']
iter_starts = max([int(os.path.splitext(filename)[0].split('_')[1]) for filename in json_files])
with open(os.path.join(log_dir, 'param_%d.json' % iter_starts), 'r') as fp:
params = json.load(fp)
entry_point_module = params['entry_point']
# Recover parameters
_ignored = ["num_gpu", 'iter_from']
for k, v in params.items():
if k not in _ignored:
kwargs[k] = v
from config import Config
Config.set_from_dict(kwargs)
print("Launching test from checkpoint: %s" % appendix)
# Launch test
train_module = importlib.import_module(entry_point_module)
train_module.test(**kwargs)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Train or Test model')
parser.add_argument('--mode', type=str, default="train", help="train or test")
parser.add_argument('--resume_from', type=str, default='', help="Whether resume last checkpoint from a past run")
parser.add_argument('--entry_point', type=str, default='train_single', help="name of the training .py file")
parser.add_argument('--batch_size', default=12, type=int, help='Batch size per gpu')
parser.add_argument('--max_iter_step', default=300000, type=int, help="Max number of iterations")
parser.add_argument('--disc_iterations', default=1, type=int, help="Number of discriminator iterations")
parser.add_argument('--ld', default=10, type=float, help="Gradient penalty lambda hyperparameter")
parser.add_argument('--optimizer', type=str, default="Adam", help="Optimizer for the graph")
parser.add_argument('--lr_G', type=float, default=2e-4, help="learning rate for the generator")
parser.add_argument('--lr_D', type=float, default=4e-4, help="learning rate for the discriminator")
parser.add_argument('--num_gpu', default=1, type=int, help="Number of GPUs to use")
parser.add_argument('--distance_map', default=1, type=int, help="Whether using distance maps for sketches")
parser.add_argument('--small_img', default=1, type=int, help="Whether using 64x64 instead of 256x256")
parser.add_argument('--extra_info', default="", type=str, help="Extra information saved for record")
args = parser.parse_args()
assert args.optimizer in ["RMSprop", "Adam", "AdaDelta", "AdaGrad"], "Unsupported optimizer"
# Set default params
d_params = {"resume_from": args.resume_from,
"entry_point": args.entry_point,
"batch_size": args.batch_size,
"max_iter_step": args.max_iter_step,
"disc_iterations": args.disc_iterations,
"ld": args.ld,
"optimizer": args.optimizer,
"lr_G": args.lr_G,
"lr_D": args.lr_D,
"num_gpu": args.num_gpu,
"distance_map": args.distance_map,
"small_img": args.small_img,
"extra_info": args.extra_info,
}
if args.mode == 'train':
# Launch training
status, appendix = launch_training(**d_params)
while status == -1: # NaN during training
print("Training ended with status -1. Restarting..")
d_params["resume_from"] = appendix
status = launch_training(**d_params)
elif args.mode == 'test':
launch_test(**d_params)

17
src_single/config.py

@ -0,0 +1,17 @@
# Global config
class Config(object):
# global options
data_format = 'NCHW'
sn = False
proj_d = False
wgan = False
SPECTRAL_NORM_UPDATE_OPS = "spectral_norm_update_ops"
pre_calculated_dist_map = True
@staticmethod
def set_from_dict(d):
assert type(d) is dict
for k, v in d.items():
setattr(Config, k, v)

665
src_single/graph_supervised.py

@ -0,0 +1,665 @@
import functools
import numpy as np
import tensorflow as tf
from tensorflow.python.framework import ops
from tensorflow.python.ops import clip_ops
import models_mru_multi_res as models
from input_pipeline_rand_mix import *
from inception_v4 import inception_v4_base, inception_v4, inception_v4_arg_scope
# from vgg import vgg_16, vgg_19, vgg_arg_scope
from sn import spectral_normed_weight
from config import Config
slim = tf.contrib.slim
def dist_map_to_image(input, threshold=0.015):
ret = tf.cast(1 - tf.cast(tf.less(input + 1, threshold), tf.int32), tf.float32)
return ret
def compute_gradients(losses, optimizers, var_lists):
assert len(losses) == len(optimizers) and len(optimizers) == len(var_lists)
grads = []
for i in range(len(losses)):
this_grad = optimizers[i].compute_gradients(losses[i], var_list=var_lists[i])
grads.append(this_grad)
return grads
def average_gradients(tower_grads_list):
"""notice: Variable pointers come from the first tower"""
grads_list = []
for i in range(len(tower_grads_list)):
average_grads = []
tower_grads = tower_grads_list[i]
num_towers = len(tower_grads)
for grad_and_vars in zip(*tower_grads):
grads = []
grad = 'No Value'
if grad_and_vars[0][0] is None:
all_none = True
for j in range(num_towers):
if grad_and_vars[j][0] is not None:
all_none = False
if all_none:
grad = None
else:
raise ValueError("None gradient inconsistent between towers.")
else:
for g, _ in grad_and_vars:
expanded_grad = tf.expand_dims(g, axis=0)
grads.append(expanded_grad)
grad = tf.concat(axis=0, values=grads)
grad = tf.reduce_mean(grad, axis=0)
v = grad_and_vars[0][1]
if isinstance(grad, str):
raise ValueError("Gradient not defined when averaging.")
grad_and_var = (grad, v)
average_grads.append(grad_and_var)
grads_list.append(average_grads)
return grads_list
def gather_summaries(max_outputs=100):
# Image summaries
orig_img_sum = tf.summary.image("original_img", tf.get_collection("original_img")[0], max_outputs=max_outputs)
orig_img_sum_d = tf.summary.image("original_img_d", tf.get_collection("original_img_d")[0], max_outputs=max_outputs)
orig_img_sum2 = tf.summary.image("original_img_2", tf.get_collection("original_img_2")[0], max_outputs=max_outputs)
img_sum_2t1 = tf.summary.image("img_2_to_1", tf.get_collection("img_2_to_1")[0], max_outputs=max_outputs)
img_sum_2t1_b = tf.summary.image("img_2_to_1_b", tf.get_collection("img_2_to_1_b")[0], max_outputs=max_outputs)
if len(tf.get_collection("dist_map_img_2")) > 0:
dist_map_sum_2 = tf.summary.image("dist_map_img_2", tf.get_collection("dist_map_img_2")[0], max_outputs=max_outputs)
# Scalar
tf.summary.scalar("GAN_loss/G", tf.reduce_mean(tf.get_collection("GAN_loss_g")))
tf.summary.scalar("GAN_loss/D", tf.reduce_mean(tf.get_collection("GAN_loss_d")))
tf.summary.scalar("GAN_loss/GP", tf.reduce_mean(tf.get_collection("GAN_loss_d_gp")))
tf.summary.scalar("ACGAN_loss/G", tf.reduce_mean(tf.get_collection("ACGAN_loss_g")))
tf.summary.scalar("ACGAN_loss/D", tf.reduce_mean(tf.get_collection("ACGAN_loss_d")))
tf.summary.scalar("direct_loss", tf.reduce_mean(tf.get_collection("direct_loss")))
tf.summary.scalar("diversity_loss", tf.reduce_mean(tf.get_collection("diversity_loss")))
tf.summary.scalar("DECAY_loss/G", tf.reduce_mean(tf.get_collection("weight_decay_loss_g")))
tf.summary.scalar("DECAY_loss/D", tf.reduce_mean(tf.get_collection("weight_decay_loss_d")))
tf.summary.scalar("sketch_proportion", tf.reduce_mean(tf.get_collection("sketch_proportion")))
tf.summary.scalar("total_loss/g", tf.reduce_mean(tf.get_collection("total_loss_g")))
tf.summary.scalar("total_loss/d", tf.reduce_mean(tf.get_collection("total_loss_d")))
return tf.summary.merge_all()
def gather_losses():
loss_g = tf.reduce_mean(tf.get_collection("loss_g"))
loss_d = tf.reduce_mean(tf.get_collection("loss_d"))
return loss_g, loss_d
def build_multi_tower_graph(images, sketches, images_d,
sketches_100,
image_paired_class_ids, image_paired_class_ids_d, image_paired_class_ids_100,
batch_size, num_gpu, batch_portion, training,
learning_rates, counter, proportion,
max_iter_step,
ld=10,
data_format='NCHW', distance_map=True,
optimizer='Adam'):
models.set_param(data_format=data_format)
tf.add_to_collection("sketch_proportion", proportion)
with tf.device('/cpu:0'):
images_list = split_inputs(images, batch_size, batch_portion, num_gpu)
images_d_list = split_inputs(images_d, batch_size, batch_portion, num_gpu)
sketches_list = split_inputs(sketches, batch_size, batch_portion, num_gpu)
image_paired_class_ids_list = split_inputs(image_paired_class_ids, batch_size, batch_portion, num_gpu)
image_paired_class_ids_d_list = split_inputs(image_paired_class_ids_d, batch_size, batch_portion, num_gpu)
sketches_100_list = [tf.identity(sketches_100)] * len(batch_portion)
image_paired_class_ids_100_list = [tf.identity(image_paired_class_ids_100)] * len(batch_portion)
lr_g = learning_rates['generator']
lr_d = learning_rates['discriminator']
optimizer = get_optimizer(optimizer)
decay = tf.maximum(0.2, 1. - (tf.cast(counter, tf.float32) / max_iter_step * 0.9))
optim_g = optimizer(learning_rate=lr_g * decay)
optim_d = optimizer(learning_rate=lr_d * decay)
tower_grads_g = []
tower_grads_d = []
for i in range(num_gpu):
with tf.device('/gpu:%d' % i):
with tf.name_scope('%s_%d' % ('GPU', i)) as scope:
print('GPU %d' % i)
loss_g, loss_d, grad_g, grad_d, inception_gen_out = build_single_graph(images_list[i],
sketches_list[i],
images_d_list[i],
sketches_100_list[i],
image_paired_class_ids_list[i],
image_paired_class_ids_d_list[i],
image_paired_class_ids_100_list[
i],
batch_size * batch_portion[i],
training,
ld=ld, data_format=data_format,
distance_map=distance_map,
optim_g=optim_g,
optim_d=optim_d)
tower_grads_g.append(grad_g)
tower_grads_d.append(grad_d)
assert len(tower_grads_g) == len(tower_grads_d)
if len(tower_grads_d) == 1:
ave_grad_g = grad_g
ave_grad_d = grad_d
else:
ave_grad_g, ave_grad_d = average_gradients((tower_grads_g, tower_grads_d))
# Apply gradients
tf.get_variable_scope()._reuse = False # Hack to force initialization of optimizer variables
if Config.sn:
# Get the update ops
spectral_norm_update_ops = tf.get_collection(Config.SPECTRAL_NORM_UPDATE_OPS)
else:
spectral_norm_update_ops = [tf.no_op()]
assign_ops = tf.no_op()
# Clip gradients if using WGAN/DRAGAN
global_grad_norm_G = None
global_grad_norm_G_clipped = None
global_grad_norm_D = None
global_grad_norm_D_clipped = None
if not Config.sn:
max_grad_norm_G = 50.
max_grad_norm_D = 100.
hard_clip_norm_G = 5.
hard_clip_norm_D = 10.
ave_grad_g_tensors, ave_grad_g_vars = list(zip(*ave_grad_g))
global_grad_norm_G = clip_ops.global_norm(ave_grad_g_tensors)
ave_grad_g_tensors, _ = clip_ops.clip_by_global_norm(ave_grad_g_tensors, max_grad_norm_G, global_grad_norm_G)
ave_grad_g_tensors = [clip_ops.clip_by_norm(t, hard_clip_norm_G) for t in ave_grad_g_tensors]
ave_grad_g = list(zip(ave_grad_g_tensors, ave_grad_g_vars))
ave_grad_d_tensors, ave_grad_d_vars = list(zip(*ave_grad_d))
global_grad_norm_D = clip_ops.global_norm(ave_grad_d_tensors)
ave_grad_d_tensors, _ = clip_ops.clip_by_global_norm(ave_grad_d_tensors, max_grad_norm_D, global_grad_norm_D)
ave_grad_d_tensors = [clip_ops.clip_by_norm(t, hard_clip_norm_D) for t in ave_grad_d_tensors]
ave_grad_d = list(zip(ave_grad_d_tensors, ave_grad_d_vars))
with tf.control_dependencies(spectral_norm_update_ops):
opt_g = optimize(ave_grad_g, optim_g, None, 'gradient_norm', global_norm=global_grad_norm_G,
global_norm_clipped=global_grad_norm_G_clipped, appendix='_G')
opt_d = optimize(ave_grad_d, optim_d, None, 'gradient_norm', global_norm=global_grad_norm_D,
global_norm_clipped=global_grad_norm_D_clipped, appendix='_D')
summaries = gather_summaries()
loss_g, loss_d = gather_losses()
# Generator output from last tower
return opt_g, opt_d, loss_g, loss_d, summaries, inception_gen_out
def build_single_graph(images, sketches, images_d,
sketches_100,
image_data_class_id, image_data_class_id_d, image_data_2_class_id_100,
batch_size, training,
ld=10,
data_format='NCHW', distance_map=True,
optim_g=None, optim_d=None):
def transfer(image_data, labels, num_classes, reuse=False, data_format=data_format, output_channel=3):
generator_scope = 'generator'
image_gen, noise = generator(image_data, output_channel=output_channel, num_classes=num_classes,
reuse=reuse, data_format=data_format, labels=labels,
scope_name=generator_scope)
return image_gen, noise, labels
models.set_param(data_format=data_format)
num_classes = get_num_classes()
############################# Graph #################################
# Input
generator = models.generator
discriminator = models.critic
image_gens, image_gens_noise, image_labels = transfer(sketches, image_data_class_id,
num_classes=num_classes, reuse=False,
data_format=data_format,
output_channel=3)
image_gens_b, image_gens_noise_b, image_labels_b = transfer(sketches, image_data_class_id,
num_classes=num_classes, reuse=True,
data_format=data_format,
output_channel=3)
if not training:
return image_gens, images, sketches
# Inception Generation
image_gen_100, _, _ = transfer(sketches_100, image_data_2_class_id_100, num_classes=num_classes,
reuse=True, data_format=data_format, output_channel=3)
# Discriminator
# Stage 1
real_disc_out, real_logit = discriminator(images_d, num_classes=num_classes, labels=image_data_class_id_d,
reuse=False, data_format=data_format, scope_name='discriminator')
fake_disc_out, fake_logit = discriminator(image_gens, num_classes=num_classes, labels=image_labels,
reuse=True, data_format=data_format, scope_name='discriminator')
############################# End Graph ##############################
loss_g, loss_d = get_losses(discriminator, None,
num_classes, data_format, ld,
# images
images, sketches,
images_d,
image_gens, image_gens_b,
# latent and labels
image_data_class_id, image_data_class_id_d,
image_gens_noise, image_gens_noise_b,
image_labels, image_labels_b,
# critic out
real_disc_out, fake_disc_out,
# logit
real_logit, fake_logit,
)
if data_format == 'NCHW':
tf.add_to_collection("original_img", tf.transpose(images, (0, 2, 3, 1)))
tf.add_to_collection("original_img_d", tf.transpose(images_d, (0, 2, 3, 1)))
tf.add_to_collection("original_img_2", tf.transpose(sketches, (0, 2, 3, 1)))
tf.add_to_collection("img_2_to_1", tf.transpose(image_gens, (0, 2, 3, 1)))
tf.add_to_collection("img_2_to_1_b", tf.transpose(image_gens_b, (0, 2, 3, 1)))
if distance_map:
tf.add_to_collection("dist_map_img_2",
tf.transpose(dist_map_to_image(sketches, threshold=0.025), (0, 2, 3, 1)))
# Add loss to collections
tf.add_to_collection("loss_g", loss_g)
tf.add_to_collection("loss_d", loss_d)
# Variable Collections
var_collections = {
'generator': tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='generator'),
'discriminator': tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='discriminator'),
}
############# Reuse Variables for next tower (?) #############
tf.get_variable_scope().reuse_variables()
############# Reuse Variables for next tower (?) #############
# # Gather summaries from last tower
# summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope)
# Calculate Gradient
grad_g, grad_d = compute_gradients((loss_g, loss_d),
(optim_g, optim_d),
var_lists=(var_collections['generator'],
var_collections['discriminator']))
return loss_g, loss_d, grad_g, grad_d, image_gen_100
def get_losses(discriminator, vae_sampler,
num_classes, data_format, ld,
# images
images, sketches,
image_d,
image_gens, image_gens_b,
# latent and labels
image_data_class_id, image_data_class_id_d,
image_gens_noise, image_gens_noise_b,
image_labels, image_labels_b,
# critic out
real_disc_out, fake_disc_out,
# logit
real_logit, fake_logit,
):
def perturb(input_data):
input_dims = len(input_data.get_shape())
reduce_axes = [0] + list(range(1, input_dims))
ret = input_data + 0.5 * tf.sqrt(tf.nn.moments(input_data, axes=reduce_axes)[1]) * tf.random_uniform(input_data.shape)
# ret = input_data + tf.random_normal(input_data.shape, stddev=2.0)
return ret
def get_acgan_loss_focal(real_image_logits_out, real_image_label,
disc_image_logits_out, condition,
num_classes, ld1=1, ld2=0.5, ld_focal=2.):
loss_ac_d = tf.reduce_mean((1 - tf.reduce_sum(tf.nn.softmax(real_image_logits_out) * tf.squeeze(
tf.one_hot(real_image_label, num_classes, on_value=1., off_value=0., dtype=tf.float32)), axis=1)) ** ld_focal *
tf.nn.sparse_softmax_cross_entropy_with_logits(logits=real_image_logits_out, labels=real_image_label))
loss_ac_d = ld1 * loss_ac_d
loss_ac_g = tf.reduce_mean(
tf.nn.sparse_softmax_cross_entropy_with_logits(logits=disc_image_logits_out, labels=condition))
loss_ac_g = ld2 * loss_ac_g
return loss_ac_g, loss_ac_d
def get_acgan_loss_orig(real_image_logits_out, real_image_label,
disc_image_logits_out, condition, num_classes,
ld1=1, ld2=0.5):
loss_ac_d = tf.reduce_mean(
tf.nn.sparse_softmax_cross_entropy_with_logits(logits=real_image_logits_out, labels=real_image_label))
loss_ac_d = ld1 * loss_ac_d
loss_ac_g = tf.reduce_mean(
tf.nn.sparse_softmax_cross_entropy_with_logits(logits=disc_image_logits_out, labels=condition))
loss_ac_g = ld2 * loss_ac_g
return loss_ac_g, loss_ac_d
def get_loss_wgan_global_gp(discriminator, data_format,
fake_data_out, fake_data_out_, real_data_out,
fake_data, real_data,
scope=None, ld=ld):
assert scope is not None
assert real_data.get_shape()[0] == fake_data.get_shape()[0]
ndim = len(real_data.get_shape())
assert ndim == 4
if data_format == 'NCHW':
concat_axis = 1
else:
concat_axis = 3
loss_g = -tf.reduce_mean(fake_data_out_)
loss_d = tf.reduce_mean(fake_data_out) - tf.reduce_mean(real_data_out)
# Gradient penalty
batch_size = int(real_data.get_shape()[0])
alpha = tf.random_uniform(shape=[batch_size, 1, 1, 1] if ndim == 4 else [batch_size, 1],
minval=0., maxval=1., dtype=tf.float32)
diff = fake_data - real_data
interp = real_data + (alpha * diff)
gradients = tf.gradients(discriminator(interp, num_classes=num_classes, reuse=True,
data_format=data_format, scope_name=scope)[0],
[interp])[0]
slopes = tf.sqrt(tf.reduce_sum(tf.square(gradients), axis=[1, 2, 3] if ndim == 4 else [1]))
gradient_penalty = tf.reduce_mean((slopes - 1.) ** 2)
tf.add_to_collection("GAN_loss_d_gp", gradient_penalty)
loss_d += ld * gradient_penalty
return loss_g, loss_d
def get_loss_wgan_sn(discriminator, data_format,
fake_data_out, fake_data_out_, real_data_out,
fake_data, real_data,
scope=None):
assert scope is not None
assert real_data.get_shape()[0] == fake_data.get_shape()[0]
ndim = len(real_data.get_shape())
assert ndim == 4
if data_format == 'NCHW':
concat_axis = 1
else:
concat_axis = 3
loss_g = tf.reduce_mean(tf.nn.softplus(-fake_data_out_))
loss_d = tf.reduce_mean(tf.nn.softplus(fake_data_out)) + tf.reduce_mean(tf.nn.softplus(-real_data_out))
# # Gradient penalty
# batch_size = int(real_data.get_shape()[0])
# alpha = tf.random_uniform(shape=[batch_size, 1, 1, 1] if ndim == 4 else [batch_size, 1],
# minval=0., maxval=1., dtype=tf.float32)
# diff = fake_data - real_data
# interp = real_data + (alpha * diff)
# gradients = tf.gradients(discriminator(interp, num_classes=num_classes, reuse=True,
# data_format=data_format, scope_name=scope)[0],
# [interp])[0]
# slopes = tf.sqrt(tf.reduce_sum(tf.square(gradients), axis=[1, 2, 3] if ndim == 4 else [1]))
# gradient_penalty = tf.reduce_mean(tf.maximum(0., slopes - 1.) ** 2)
# tf.add_to_collection("GAN_loss_d_gp", gradient_penalty)
#
# loss_d += ld * gradient_penalty
return loss_g, loss_d
def get_loss_original_gan_local_gp_one_side_multi(discriminator, data_format,
fake_data_out, fake_data_out_, real_data_out,
fake_data, real_data,
scope=None, ld=ld):
assert scope is not None
# assert real_data.get_shape()[0] == fake_data.get_shape()[0]
ndim = len(real_data.get_shape())
ndim_out = len(fake_data_out.get_shape())
assert ndim == 4
assert ndim_out == 4 or ndim_out == 2
if ndim_out == 4:
sum_axis = (1, 2, 3)
else:
sum_axis = 1
if data_format == 'NCHW':
concat_axis = 1
else:
concat_axis = 3
loss_d_fake = tf.reduce_mean(tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(
logits=fake_data_out, labels=tf.zeros_like(fake_data_out)), axis=sum_axis))
loss_d_real = tf.reduce_mean(tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(
logits=real_data_out, labels=tf.ones_like(real_data_out)), axis=sum_axis))
loss_g_fake = tf.reduce_mean(tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(
logits=fake_data_out, labels=tf.ones_like(fake_data_out)), axis=sum_axis))
loss_g = loss_g_fake
loss_d = loss_d_fake + loss_d_real
loss_d /= 2
batch_size = int(real_data.get_shape()[0])
# Gradient penalty
alpha = tf.random_uniform(shape=[batch_size, 1, 1, 1] if ndim == 4 else [batch_size, 1],
minval=0., maxval=1., dtype=tf.float32)
diff = perturb(real_data) - real_data
interp = real_data + (alpha * diff)
gradients = tf.gradients(discriminator(interp, num_classes=num_classes, reuse=True,
data_format=data_format, scope_name=scope)[0],
[interp])[0]
slopes = tf.sqrt(tf.reduce_sum(tf.square(gradients), axis=[1, 2, 3] if ndim == 4 else [1]))
gradient_penalty = tf.reduce_mean(tf.maximum(0., slopes - 1.) ** 2)
loss_d += ld * gradient_penalty
return loss_g, loss_d
def build_inception(inputs, reuse=True, scope='InceptionV4'):
is_training = False
arg_scope = inception_v4_arg_scope(weight_decay=0.0)
with slim.arg_scope(arg_scope):
with tf.variable_scope(scope, 'InceptionV4', [inputs], reuse=reuse) as scope:
with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training):
logits, end_points = inception_v4_base(inputs, final_endpoint='Mixed_5b', scope=scope)
return [end_points['Conv2d_2a_3x3'], end_points['Mixed_4a'], end_points['Mixed_5b']]
def build_vgg(inputs, reuse=True, scope='vgg_16', num=0):
def get_endpoint(end_points, name):
for key in end_points.keys():
if name in key:
return end_points[key]
is_training = False
arg_scope = vgg_arg_scope(weight_decay=0.0)
with slim.arg_scope(arg_scope):
logits, end_points, my_end_points = vgg_16(inputs, is_training=is_training,
reuse=reuse, scope=scope, num=num)
return [get_endpoint(end_points, 'conv1_2'), get_endpoint(end_points, 'conv2_2'),
get_endpoint(end_points, 'conv3_2'), get_endpoint(end_points, 'conv4_2'), ]
def get_perceptual_loss(image1, image2, data_format, type="Inception", reuse=True):
assert data_format == 'NCHW'
image1 = tf.transpose(image1, (0, 2, 3, 1))
image2 = tf.transpose(image2, (0, 2, 3, 1))
if type == "Inception":
# Normalize to 0-1
image1 = (image1 + 1) / 2.
image2 = (image2 + 1) / 2.
dim = 299
# Resize to 299, 299
image1 = tf.image.resize_bilinear(image1, [dim, dim])
image2 = tf.image.resize_bilinear(image2, [dim, dim])
image1_lys = build_inception(image1, reuse=reuse)
image2_lys = build_inception(image2)
elif type == "vgg":
image_size = image1.get_shape().as_list()
dim = 224
_R_MEAN = tf.constant(123.68, shape=[image_size[0], dim, dim, 1], dtype=tf.float32)
_G_MEAN = tf.constant(116.78, shape=[image_size[0], dim, dim, 1], dtype=tf.float32)
_B_MEAN = tf.constant(103.94, shape=[image_size[0], dim, dim, 1], dtype=tf.float32)
_MEAN = tf.concat([_R_MEAN, _G_MEAN, _B_MEAN], axis=3)
# Normalize to 0-255
image1 = (image1 + 1) * 255. / 2.
image2 = (image2 + 1) * 255. / 2.
# Resize to 299, 299
image1 = tf.image.resize_bilinear(image1, [dim, dim])
image2 = tf.image.resize_bilinear(image2, [dim, dim])
# Substract mean
image1 -= _MEAN
image2 -= _MEAN
image1_lys = build_vgg(image1, reuse=reuse, num=0)
image2_lys = build_vgg(image2, num=1)
else:
raise ValueError("Network type unknown.")
tf.add_to_collection("inception_layer_1_1", image1_lys[0])
tf.add_to_collection("inception_layer_1_2", image1_lys[1])
tf.add_to_collection("inception_layer_1_3", image1_lys[2])
tf.add_to_collection("inception_layer_2_1", image2_lys[0])
tf.add_to_collection("inception_layer_2_2", image2_lys[1])
tf.add_to_collection("inception_layer_2_3", image2_lys[2])
loss_perceptual = 0.
for i in range(len(image2_lys)):
loss_perceptual += tf.reduce_mean(tf.abs(image2_lys[i] - image1_lys[i])) # L1
# loss_perceptual += coeffs[i] * tf.sqrt(tf.reduce_sum(tf.square(image2_lys[i] - image1_lys[i]), axis=[1, 2, 3])) # L2
# loss_perceptual = coeffs[i] * models.vae_loss_reconstruct(image2_lys[i], image1_lys[i]) # log-likelihood
return loss_perceptual
if Config.sn:
get_gan_loss = get_loss_wgan_sn
else:
if Config.wgan:
get_gan_loss = get_loss_wgan_global_gp
else:
get_gan_loss = get_loss_original_gan_local_gp_one_side_multi
# GAN Loss, current stage
loss_g_gan, loss_d_gan = get_gan_loss(discriminator, data_format,
fake_disc_out,
fake_disc_out,
real_disc_out,
image_gens,
image_d,
scope='discriminator')
tf.add_to_collection("GAN_loss_g", loss_g_gan)
tf.add_to_collection("GAN_loss_d", loss_d_gan)
# ACGAN loss
if not Config.proj_d:
loss_g_ac, loss_d_ac = get_acgan_loss_focal(real_logit, image_data_class_id_d,
fake_logit, image_labels,
num_classes=num_classes)
tf.add_to_collection("ACGAN_loss_g", loss_g_ac)
tf.add_to_collection("ACGAN_loss_d", loss_d_ac)
loss_g_gan += loss_g_ac
loss_d_gan += loss_d_ac
# Direct loss
loss_gt = 0.
loss_gt += tf.losses.absolute_difference(images, image_gens) # L1
loss_gt += 0.3 * get_perceptual_loss(images, image_gens,
data_format=data_format, type="Inception", reuse=False) # Perceptual
tf.add_to_collection("direct_loss", loss_gt)
# Diversity loss
loss_dv = 0.
this_loss_dv = tf.abs(image_gens - image_gens_b) # L1
this_loss_dv = this_loss_dv / tf.reshape(tf.norm(image_gens_noise - image_gens_noise_b, axis=1), (-1, 1, 1, 1))
loss_dv -= tf.reduce_mean(this_loss_dv)
tf.add_to_collection("diversity_loss", loss_dv)
# Regularization/Weight Decay loss
loss_decay_g = tf.losses.get_regularization_loss(scope='generator')
loss_decay_d = tf.losses.get_regularization_loss(scope='discriminator')
tf.add_to_collection("weight_decay_loss_g", loss_decay_g)
tf.add_to_collection("weight_decay_loss_d", loss_decay_d)
coeff_gt = 10
coeff_dv = 10
coeff_decay = 1
loss_g = loss_g_gan + coeff_gt * loss_gt + coeff_dv * loss_dv + coeff_decay * loss_decay_g
loss_d = loss_d_gan + coeff_decay * loss_decay_d
tf.add_to_collection("total_loss_g", loss_g)
tf.add_to_collection("total_loss_d", loss_d)
return loss_g, loss_d
def get_optimizer(optimizer_name, **kwargs):
if optimizer_name.lower() == 'RMSProp'.lower():
return functools.partial(tf.train.RMSPropOptimizer, decay=0.9, momentum=0.0, epsilon=1e-10)
elif optimizer_name.lower() == 'Adam'.lower():
return functools.partial(tf.train.AdamOptimizer, beta1=0., beta2=0.9)
# return functools.partial(tf.train.AdamOptimizer, beta1=0.5, beta2=0.9)
elif optimizer_name.lower() == 'AdaDelta'.lower():
return tf.train.AdadeltaOptimizer
elif optimizer_name.lower() == 'AdaGrad'.lower():
return tf.train.AdagradOptimizer
def optimize(gradients, optim, global_step, summaries, global_norm=None, global_norm_clipped=None, appendix=''):
"""Modified from sugartensor"""
# Add Summary
if summaries is None:
summaries = ["loss", "learning_rate"]
if "gradient_norm" in summaries:
if global_norm is None:
tf.summary.scalar("global_norm/gradient_norm" + appendix,
clip_ops.global_norm(list(zip(*gradients))[0]))
else:
tf.summary.scalar("global_norm/gradient_norm" + appendix,
global_norm)
if global_norm_clipped is not None:
tf.summary.scalar("global_norm/gradient_norm_clipped" + appendix,
global_norm_clipped)
# Add histograms for variables, gradients and gradient norms.
for gradient, variable in gradients:
if isinstance(gradient, ops.IndexedSlices):
grad_values = gradient.values
else:
grad_values = gradient
if grad_values is not None:
var_name = variable.name.replace(":", "_")
if "gradients" in summaries:
tf.summary.histogram("gradients/%s" % var_name, grad_values)
if "gradient_norm" in summaries:
tf.summary.scalar("gradient_norm/%s" % var_name,
clip_ops.global_norm([grad_values]))
# Gradient Update OP
return optim.apply_gradients(gradients, global_step=global_step)

105
src_single/inception_score.py

@ -0,0 +1,105 @@
# From https://github.com/openai/improved-gan/blob/master/inception_score/model.py
# Code derived from tensorflow/tensorflow/models/image/imagenet/classify_image.py
import os.path
import sys
import tarfile
import numpy as np
from six.moves import urllib
import tensorflow as tf
import glob
import scipy.misc
import math
import sys
MODEL_DIR = './inception_model/imagenet'
DATA_URL = 'http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz'
softmax = None
prefix = 'Inception'
config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=True)
config.gpu_options.allow_growth = True
config.gpu_options.per_process_gpu_memory_fraction = 0.9
# Call this function with list of images. Each of elements should be a
# numpy array with values ranging from 0 to 255.
def get_inception_score(images, sess, splits=10):
assert (type(images) == list)
assert (type(images[0]) == np.ndarray)
assert (len(images[0].shape) == 3)
# assert (np.max(images[0]) > 10)
# assert (np.min(images[0]) >= 0.0)
inps = []
for img in images:
img = img.astype(np.float32)
inps.append(np.expand_dims(img, 0))
bs = 100
preds = []
n_batches = int(math.ceil(float(len(inps)) / float(bs)))
for i in range(n_batches):
# sys.stdout.write(".")
# sys.stdout.flush()
inp = inps[(i * bs):min((i + 1) * bs, len(inps))]
inp = np.concatenate(inp, 0)
pred = sess.run(softmax, {prefix + 'ExpandDims:0': inp})
preds.append(pred)
preds = np.concatenate(preds, 0)
scores = []
for i in range(splits):
part = preds[(i * preds.shape[0] // splits):((i + 1) * preds.shape[0] // splits), :]
kl = part * (np.log(part) - np.log(np.expand_dims(np.mean(part, 0), 0)))
kl = np.mean(np.sum(kl, 1))
scores.append(np.exp(kl))
return np.mean(scores), np.std(scores)
# This function is called automatically.
def _init_inception():
global softmax, prefix
if not os.path.exists(MODEL_DIR):
os.makedirs(MODEL_DIR)
filename = DATA_URL.split('/')[-1]
filepath = os.path.join(MODEL_DIR, filename)
if not os.path.exists(filepath):
def _progress(count, block_size, total_size):
sys.stdout.write('\r>> Downloading %s %.1f%%' % (
filename, float(count * block_size) / float(total_size) * 100.0))
sys.stdout.flush()
filepath, _ = urllib.request.urlretrieve(DATA_URL, filepath, _progress)
print()
statinfo = os.stat(filepath)
print('Succesfully downloaded', filename, statinfo.st_size, 'bytes.')
tarfile.open(filepath, 'r:gz').extractall(MODEL_DIR)
with tf.gfile.FastGFile(os.path.join(
MODEL_DIR, 'classify_image_graph_def.pb'), 'rb') as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
for node in graph_def.node:
node.device = "/gpu:2"
_ = tf.import_graph_def(graph_def, name=prefix)
if prefix[-1] != '/':
prefix += '/'
# Works with an arbitrary minibatch size.
with tf.Session(config=config) as sess:
pool3 = sess.graph.get_tensor_by_name(prefix + 'pool_3:0')
ops = pool3.graph.get_operations()
for op_idx, op in enumerate(ops):
for o in op.outputs:
shape = o.get_shape()
shape = [s.value for s in shape]
new_shape = []
for j, s in enumerate(shape):
if s == 1 and j == 0:
new_shape.append(None)
else:
new_shape.append(s)
o._shape = tf.TensorShape(new_shape)
w = sess.graph.get_operation_by_name(prefix + "softmax/logits/MatMul").inputs[1]
logits = tf.matmul(tf.squeeze(pool3), w)
softmax = tf.nn.softmax(logits)
if softmax is None:
_init_inception()

71
src_single/inception_utils.py

@ -0,0 +1,71 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains common code shared by all inception models.
Usage of arg scope:
with slim.arg_scope(inception_arg_scope()):
logits, end_points = inception.inception_v3(images, num_classes,
is_training=is_training)
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
slim = tf.contrib.slim
def inception_arg_scope(weight_decay=0.00004,
use_batch_norm=True,
batch_norm_decay=0.9997,
batch_norm_epsilon=0.001):
"""Defines the default arg scope for inception models.
Args:
weight_decay: The weight decay to use for regularizing the model.
use_batch_norm: "If `True`, batch_norm is applied after each convolution.
batch_norm_decay: Decay for batch norm moving average.
batch_norm_epsilon: Small float added to variance to avoid dividing by zero
in batch norm.
Returns:
An `arg_scope` to use for the inception models.
"""
batch_norm_params = {
# Decay for the moving averages.
'decay': batch_norm_decay,
# epsilon to prevent 0s in variance.
'epsilon': batch_norm_epsilon,
# collection containing update_ops.
'updates_collections': tf.GraphKeys.UPDATE_OPS,
}
if use_batch_norm:
normalizer_fn = slim.batch_norm
normalizer_params = batch_norm_params
else:
normalizer_fn = None
normalizer_params = {}
# Set weight_decay for weights in Conv and FC layers.
with slim.arg_scope([slim.conv2d, slim.fully_connected],
weights_regularizer=slim.l2_regularizer(weight_decay)):
with slim.arg_scope(
[slim.conv2d],
weights_initializer=slim.variance_scaling_initializer(),
activation_fn=tf.nn.relu,
normalizer_fn=normalizer_fn,
normalizer_params=normalizer_params) as sc:
return sc

324
src_single/inception_v4.py

@ -0,0 +1,324 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains the definition of the Inception V4 architecture.
As described in http://arxiv.org/abs/1602.07261.
Inception-v4, Inception-ResNet and the Impact of Residual Connections
on Learning
Christian Szegedy, Sergey Ioffe, Vincent Vanhoucke, Alex Alemi
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
import inception_utils
slim = tf.contrib.slim
def block_inception_a(inputs, scope=None, reuse=None):
"""Builds Inception-A block for Inception v4 network."""
# By default use stride=1 and SAME padding
with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d],
stride=1, padding='SAME'):
with tf.variable_scope(scope, 'BlockInceptionA', [inputs], reuse=reuse):
with tf.variable_scope('Branch_0'):
branch_0 = slim.conv2d(inputs, 96, [1, 1], scope='Conv2d_0a_1x1')
with tf.variable_scope('Branch_1'):
branch_1 = slim.conv2d(inputs, 64, [1, 1], scope='Conv2d_0a_1x1')
branch_1 = slim.conv2d(branch_1, 96, [3, 3], scope='Conv2d_0b_3x3')
with tf.variable_scope('Branch_2'):
branch_2 = slim.conv2d(inputs, 64, [1, 1], scope='Conv2d_0a_1x1')
branch_2 = slim.conv2d(branch_2, 96, [3, 3], scope='Conv2d_0b_3x3')
branch_2 = slim.conv2d(branch_2, 96, [3, 3], scope='Conv2d_0c_3x3')
with tf.variable_scope('Branch_3'):
branch_3 = slim.avg_pool2d(inputs, [3, 3], scope='AvgPool_0a_3x3')
branch_3 = slim.conv2d(branch_3, 96, [1, 1], scope='Conv2d_0b_1x1')
return tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
def block_reduction_a(inputs, scope=None, reuse=None):
"""Builds Reduction-A block for Inception v4 network."""
# By default use stride=1 and SAME padding
with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d],
stride=1, padding='SAME'):
with tf.variable_scope(scope, 'BlockReductionA', [inputs], reuse=reuse):
with tf.variable_scope('Branch_0'):
branch_0 = slim.conv2d(inputs, 384, [3, 3], stride=2, padding='VALID',
scope='Conv2d_1a_3x3')
with tf.variable_scope('Branch_1'):
branch_1 = slim.conv2d(inputs, 192, [1, 1], scope='Conv2d_0a_1x1')
branch_1 = slim.conv2d(branch_1, 224, [3, 3], scope='Conv2d_0b_3x3')
branch_1 = slim.conv2d(branch_1, 256, [3, 3], stride=2,
padding='VALID', scope='Conv2d_1a_3x3')
with tf.variable_scope('Branch_2'):
branch_2 = slim.max_pool2d(inputs, [3, 3], stride=2, padding='VALID',
scope='MaxPool_1a_3x3')
return tf.concat(axis=3, values=[branch_0, branch_1, branch_2])
def block_inception_b(inputs, scope=None, reuse=None):
"""Builds Inception-B block for Inception v4 network."""
# By default use stride=1 and SAME padding
with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d],
stride=1, padding='SAME'):
with tf.variable_scope(scope, 'BlockInceptionB', [inputs], reuse=reuse):
with tf.variable_scope('Branch_0'):
branch_0 = slim.conv2d(inputs, 384, [1, 1], scope='Conv2d_0a_1x1')
with tf.variable_scope('Branch_1'):
branch_1 = slim.conv2d(inputs, 192, [1, 1], scope='Conv2d_0a_1x1')
branch_1 = slim.conv2d(branch_1, 224, [1, 7], scope='Conv2d_0b_1x7')
branch_1 = slim.conv2d(branch_1, 256, [7, 1], scope='Conv2d_0c_7x1')
with tf.variable_scope('Branch_2'):
branch_2 = slim.conv2d(inputs, 192, [1, 1], scope='Conv2d_0a_1x1')
branch_2 = slim.conv2d(branch_2, 192, [7, 1], scope='Conv2d_0b_7x1')
branch_2 = slim.conv2d(branch_2, 224, [1, 7], scope='Conv2d_0c_1x7')
branch_2 = slim.conv2d(branch_2, 224, [7, 1], scope='Conv2d_0d_7x1')
branch_2 = slim.conv2d(branch_2, 256, [1, 7], scope='Conv2d_0e_1x7')
with tf.variable_scope('Branch_3'):
branch_3 = slim.avg_pool2d(inputs, [3, 3], scope='AvgPool_0a_3x3')
branch_3 = slim.conv2d(branch_3, 128, [1, 1], scope='Conv2d_0b_1x1')
return tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
def block_reduction_b(inputs, scope=None, reuse=None):
"""Builds Reduction-B block for Inception v4 network."""
# By default use stride=1 and SAME padding
with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d],
stride=1, padding='SAME'):
with tf.variable_scope(scope, 'BlockReductionB', [inputs], reuse=reuse):
with tf.variable_scope('Branch_0'):
branch_0 = slim.conv2d(inputs, 192, [1, 1], scope='Conv2d_0a_1x1')
branch_0 = slim.conv2d(branch_0, 192, [3, 3], stride=2,
padding='VALID', scope='Conv2d_1a_3x3')
with tf.variable_scope('Branch_1'):
branch_1 = slim.conv2d(inputs, 256, [1, 1], scope='Conv2d_0a_1x1')
branch_1 = slim.conv2d(branch_1, 256, [1, 7], scope='Conv2d_0b_1x7')
branch_1 = slim.conv2d(branch_1, 320, [7, 1], scope='Conv2d_0c_7x1')
branch_1 = slim.conv2d(branch_1, 320, [3, 3], stride=2,
padding='VALID', scope='Conv2d_1a_3x3')
with tf.variable_scope('Branch_2'):
branch_2 = slim.max_pool2d(inputs, [3, 3], stride=2, padding='VALID',
scope='MaxPool_1a_3x3')
return tf.concat(axis=3, values=[branch_0, branch_1, branch_2])
def block_inception_c(inputs, scope=None, reuse=None):
"""Builds Inception-C block for Inception v4 network."""
# By default use stride=1 and SAME padding
with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d],
stride=1, padding='SAME'):
with tf.variable_scope(scope, 'BlockInceptionC', [inputs], reuse=reuse):
with tf.variable_scope('Branch_0'):
branch_0 = slim.conv2d(inputs, 256, [1, 1], scope='Conv2d_0a_1x1')
with tf.variable_scope('Branch_1'):
branch_1 = slim.conv2d(inputs, 384, [1, 1], scope='Conv2d_0a_1x1')
branch_1 = tf.concat(axis=3, values=[
slim.conv2d(branch_1, 256, [1, 3], scope='Conv2d_0b_1x3'),
slim.conv2d(branch_1, 256, [3, 1], scope='Conv2d_0c_3x1')])
with tf.variable_scope('Branch_2'):
branch_2 = slim.conv2d(inputs, 384, [1, 1], scope='Conv2d_0a_1x1')
branch_2 = slim.conv2d(branch_2, 448, [3, 1], scope='Conv2d_0b_3x1')
branch_2 = slim.conv2d(branch_2, 512, [1, 3], scope='Conv2d_0c_1x3')
branch_2 = tf.concat(axis=3, values=[
slim.conv2d(branch_2, 256, [1, 3], scope='Conv2d_0d_1x3'),
slim.conv2d(branch_2, 256, [3, 1], scope='Conv2d_0e_3x1')])
with tf.variable_scope('Branch_3'):
branch_3 = slim.avg_pool2d(inputs, [3, 3], scope='AvgPool_0a_3x3')
branch_3 = slim.conv2d(branch_3, 256, [1, 1], scope='Conv2d_0b_1x1')
return tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
def inception_v4_base(inputs, final_endpoint='Mixed_7d', scope=None):
"""Creates the Inception V4 network up to the given final endpoint.
Args:
inputs: a 4-D tensor of size [batch_size, height, width, 3].
final_endpoint: specifies the endpoint to construct the network up to.
It can be one of [ 'Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3',
'Mixed_3a', 'Mixed_4a', 'Mixed_5a', 'Mixed_5b', 'Mixed_5c', 'Mixed_5d',
'Mixed_5e', 'Mixed_6a', 'Mixed_6b', 'Mixed_6c', 'Mixed_6d', 'Mixed_6e',
'Mixed_6f', 'Mixed_6g', 'Mixed_6h', 'Mixed_7a', 'Mixed_7b', 'Mixed_7c',
'Mixed_7d']
scope: Optional variable_scope.
Returns:
logits: the logits outputs of the model.
end_points: the set of end_points from the inception model.
Raises:
ValueError: if final_endpoint is not set to one of the predefined values,
"""
end_points = {}
def add_and_check_final(name, net):
end_points[name] = net
return name == final_endpoint
with tf.variable_scope(scope, 'InceptionV4', [inputs]):
with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
stride=1, padding='SAME'):
# 299 x 299 x 3
net = slim.conv2d(inputs, 32, [3, 3], stride=2,
padding='VALID', scope='Conv2d_1a_3x3')
if add_and_check_final('Conv2d_1a_3x3', net): return net, end_points
# 149 x 149 x 32
net = slim.conv2d(net, 32, [3, 3], padding='VALID',
scope='Conv2d_2a_3x3')
if add_and_check_final('Conv2d_2a_3x3', net): return net, end_points
# 147 x 147 x 32
net = slim.conv2d(net, 64, [3, 3], scope='Conv2d_2b_3x3')
if add_and_check_final('Conv2d_2b_3x3', net): return net, end_points
# 147 x 147 x 64
with tf.variable_scope('Mixed_3a'):
with tf.variable_scope('Branch_0'):
branch_0 = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID',
scope='MaxPool_0a_3x3')
with tf.variable_scope('Branch_1'):
branch_1 = slim.conv2d(net, 96, [3, 3], stride=2, padding='VALID',
scope='Conv2d_0a_3x3')
net = tf.concat(axis=3, values=[branch_0, branch_1])
if add_and_check_final('Mixed_3a', net): return net, end_points
# 73 x 73 x 160
with tf.variable_scope('Mixed_4a'):
with tf.variable_scope('Branch_0'):
branch_0 = slim.conv2d(net, 64, [1, 1], scope='Conv2d_0a_1x1')
branch_0 = slim.conv2d(branch_0, 96, [3, 3], padding='VALID',
scope='Conv2d_1a_3x3')
with tf.variable_scope('Branch_1'):
branch_1 = slim.conv2d(net, 64, [1, 1], scope='Conv2d_0a_1x1')
branch_1 = slim.conv2d(branch_1, 64, [1, 7], scope='Conv2d_0b_1x7')
branch_1 = slim.conv2d(branch_1, 64, [7, 1], scope='Conv2d_0c_7x1')
branch_1 = slim.conv2d(branch_1, 96, [3, 3], padding='VALID',
scope='Conv2d_1a_3x3')
net = tf.concat(axis=3, values=[branch_0, branch_1])
if add_and_check_final('Mixed_4a', net): return net, end_points
# 71 x 71 x 192
with tf.variable_scope('Mixed_5a'):
with tf.variable_scope('Branch_0'):
branch_0 = slim.conv2d(net, 192, [3, 3], stride=2, padding='VALID',
scope='Conv2d_1a_3x3')
with tf.variable_scope('Branch_1'):
branch_1 = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID',
scope='MaxPool_1a_3x3')
net = tf.concat(axis=3, values=[branch_0, branch_1])
if add_and_check_final('Mixed_5a', net): return net, end_points
# 35 x 35 x 384
# 4 x Inception-A blocks
for idx in range(4):
block_scope = 'Mixed_5' + chr(ord('b') + idx)
net = block_inception_a(net, block_scope)
if add_and_check_final(block_scope, net): return net, end_points
# 35 x 35 x 384
# Reduction-A block
net = block_reduction_a(net, 'Mixed_6a')
if add_and_check_final('Mixed_6a', net): return net, end_points
# 17 x 17 x 1024
# 7 x Inception-B blocks
for idx in range(7):
block_scope = 'Mixed_6' + chr(ord('b') + idx)
net = block_inception_b(net, block_scope)
if add_and_check_final(block_scope, net): return net, end_points
# 17 x 17 x 1024
# Reduction-B block
net = block_reduction_b(net, 'Mixed_7a')
if add_and_check_final('Mixed_7a', net): return net, end_points
# 8 x 8 x 1536
# 3 x Inception-C blocks
for idx in range(3):
block_scope = 'Mixed_7' + chr(ord('b') + idx)
net = block_inception_c(net, block_scope)
if add_and_check_final(block_scope, net): return net, end_points
raise ValueError('Unknown final endpoint %s' % final_endpoint)
def inception_v4(inputs, num_classes=1001, is_training=True,
dropout_keep_prob=0.8,
reuse=None,
scope='InceptionV4',
create_aux_logits=True):
"""Creates the Inception V4 model.
Args:
inputs: a 4-D tensor of size [batch_size, height, width, 3].
num_classes: number of predicted classes.
is_training: whether is training or not.
dropout_keep_prob: float, the fraction to keep before final layer.
reuse: whether or not the network and its variables should be reused. To be
able to reuse 'scope' must be given.
scope: Optional variable_scope.
create_aux_logits: Whether to include the auxiliary logits.
Returns:
logits: the logits outputs of the model.
end_points: the set of end_points from the inception model.
"""
end_points = {}
with tf.variable_scope(scope, 'InceptionV4', [inputs], reuse=reuse) as scope:
with slim.arg_scope([slim.batch_norm, slim.dropout],
is_training=is_training):
net, end_points = inception_v4_base(inputs, scope=scope)
with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
stride=1, padding='SAME'):
# Auxiliary Head logits
if create_aux_logits:
with tf.variable_scope('AuxLogits'):
# 17 x 17 x 1024
aux_logits = end_points['Mixed_6h']
aux_logits = slim.avg_pool2d(aux_logits, [5, 5], stride=3,
padding='VALID',
scope='AvgPool_1a_5x5')
aux_logits = slim.conv2d(aux_logits, 128, [1, 1],
scope='Conv2d_1b_1x1')
aux_logits = slim.conv2d(aux_logits, 768,
aux_logits.get_shape()[1:3],
padding='VALID', scope='Conv2d_2a')
aux_logits = slim.flatten(aux_logits)
aux_logits = slim.fully_connected(aux_logits, num_classes,
activation_fn=None,
scope='Aux_logits')
end_points['AuxLogits'] = aux_logits
# Final pooling and prediction
with tf.variable_scope('Logits'):
# 8 x 8 x 1536
net = slim.avg_pool2d(net, net.get_shape()[1:3], padding='VALID',
scope='AvgPool_1a')
# 1 x 1 x 1536
net = slim.dropout(net, dropout_keep_prob, scope='Dropout_1b')
net = slim.flatten(net, scope='PreLogitsFlatten')
end_points['PreLogitsFlatten'] = net
# 1536
logits = slim.fully_connected(net, num_classes, activation_fn=None,
scope='Logits')
end_points['Logits'] = logits
end_points['Predictions'] = tf.nn.softmax(logits, name='Predictions')
return logits, end_points
inception_v4.default_image_size = 299
inception_v4_arg_scope = inception_utils.inception_arg_scope

305
src_single/input_pipeline_rand_mix.py

@ -0,0 +1,305 @@
import os
import cv2
import numpy as np
import tensorflow as tf
from data_processing.tfrecord import *
from scipy import ndimage
from config import Config
# TODO Change to Dataset API
paired_dir_1 = '../CycleGAN_sketchy/training_data/sketchy_2'
paired_dir_2 = '../CycleGAN_sketchy/training_data/flickr_output_new'
paired_filenames_1 = [os.path.join(paired_dir_1, f) for f in os.listdir(paired_dir_1)
if os.path.isfile(os.path.join(paired_dir_1, f))]
paired_filenames_2 = [os.path.join(paired_dir_2, f) for f in os.listdir(paired_dir_2)
if os.path.isfile(os.path.join(paired_dir_2, f))]
print("paired file sketchy num: %d" % len(paired_filenames_1))
print("paired file flickr num: %d" % len(paired_filenames_2))
# build class map
class_mapping = []
classes_info = './data_processing/classes.csv'
classes = read_csv(classes_info)
classes_id = [item['Name'] for item in classes]
for name in paired_filenames_1:
name = os.path.splitext(os.path.split(name)[1])[0].split('_coco_')[0]
class_id = classes_id.index(name)