论文链接:Unsupervised Representation Learning with Deep Convolution Generative Adversarial Networks

DCGAN

GAN的提出为使用神经网络进行图像生成打开了新世界的大门
而图像问题又与CNN有着很自然的关联
于是卷积与GAN的融合——DCGAN(Deep Convolution GAN)便诞生了

DCGAN是第一个将CNN应用于GAN取得成功的模型
它的主要特点是

  • 将判别器中的池化下采样改为带步长的卷积,将生成器中的上采样改为分数步长卷积,也即反卷积
  • 在模型中引入batchnorm(但生成器最后一层和判别器第一层不加入)
  • 移除了隐藏层的全连接层
  • 在生成器除最后一层外使用ReLU激活(原始GAN中使用了maxout)
  • 在判别器中使用LeakyReLU激活

Keras实现

代码中使用的是mnist数据集

此处代码中判别器没有用batchnorm,因为发现加了会造成判别器过强,进而导致collapse
可能是mnist数据集太简单造成的

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
from keras.datasets import mnist
from keras.models import Model, Sequential
from keras.layers import Dense, Flatten, BatchNormalization, Reshape, Input, Activation, \
Conv2D, Conv2DTranspose, Dropout
from keras.layers import LeakyReLU
from keras.optimizers import Adam
import numpy as np
import matplotlib.pyplot as plt

class DCGAN:
def __init__(self):
self.img_shape = (28, 28, 1)
self.latent_dim = 100

self.generator = self.buildGenerator()
self.discriminator = self.buildDiscriminator()

input = Input(shape=(self.latent_dim,))
img = self.generator(input)

# 在判别器compile之后设置trainable为False
# 则使用train_on_batch时判别器仍可训练, 而训练生成器(GAN)时则判别器权重不变
self.discriminator.trainable = False

validity = self.discriminator(img)

self.combined = Model(input, validity)
self.combined.compile(loss='binary_crossentropy', optimizer=Adam(2e-4))

def buildGenerator(self):
model = Sequential()

model.add(Dense(input_dim=self.latent_dim, units=7*7*256))
model.add(BatchNormalization(momentum=0.8))
model.add(Activation('relu'))

model.add(Reshape((7, 7, 256)))

model.add(Conv2DTranspose(filters=128, kernel_size=3, strides=2, padding='same'))
model.add(BatchNormalization(momentum=0.8))
model.add(Activation('relu'))

model.add(Conv2DTranspose(filters=64, kernel_size=3, strides=2, padding='same'))
model.add(BatchNormalization(momentum=0.8))
model.add(Activation('relu'))

model.add(Conv2DTranspose(filters=32, kernel_size=3, strides=1, padding='same'))
model.add(BatchNormalization(momentum=0.8))
model.add(Activation('relu'))

model.add(Conv2DTranspose(filters=1, kernel_size=3, strides=1, padding='same'))
model.add(Activation('tanh'))

noise = Input(shape=(self.latent_dim,))
img = model(noise)

return Model(noise, img)

def buildDiscriminator(self):
model = Sequential()

model.add(Conv2D(input_shape=self.img_shape, filters=64, kernel_size=3, strides=2, padding='same'))
model.add(LeakyReLU(0.2))
model.add(Dropout(0.4))

model.add(Conv2D(filters=128, kernel_size=3, strides=2, padding='same'))
model.add(LeakyReLU(0.2))
model.add(Dropout(0.4))

model.add(Conv2D(filters=256, kernel_size=3, strides=2, padding='same'))
model.add(LeakyReLU(0.2))
model.add(Dropout(0.4))

model.add(Flatten())
model.add(Dense(1, activation='sigmoid'))

img = Input(shape=self.img_shape)
validity = model(img)

discriminator = Model(img, validity)
discriminator.compile(optimizer=Adam(2e-4), loss='binary_crossentropy')

return discriminator

def trainModel(self, epochs, batch_size=64):
(X_train, Y_train), (X_test, Y_test) = mnist.load_data()

# Normalize [-1, 1]
X_train = X_train / 127.5 - 1.
X_train = np.expand_dims(X_train, axis=3)

# 训练用的标签向量, valid为全1矩阵, fake为全0
valid = np.ones((batch_size, 1))
fake = np.zeros((batch_size, 1))

for epoch in range(epochs):
epoch += 1

# 随机选取一个batch的图片 randint(low, high, num)
idx = np.random.randint(0, X_train.shape[0], batch_size)
orgImg = X_train[idx]

# 生成标准正版态分布噪声作为输入
noise = np.random.normal(0, 1, (batch_size, self.latent_dim))

# generator以输入的随机噪声生成假图片
genImg = self.generator.predict(noise)

'''训练判别器D'''
D_loss_real = self.discriminator.train_on_batch(orgImg, valid)
D_loss_fake = self.discriminator.train_on_batch(genImg, fake)
D_loss = 0.5 * np.add(D_loss_real, D_loss_fake)

'''训练生成器G'''
noise = np.random.normal(0, 1, (batch_size, self.latent_dim))
G_loss = self.combined.train_on_batch(noise, valid)

print("{} --- D loss: {:.4f} , G loss: {:.4f}".format(epoch, D_loss, G_loss))

if epoch % 400 == 0:
self.saveImage(epoch)

def saveImage(self, epoch):
r, c = 3, 3
noise = np.random.normal(0, 1, (r * c, self.latent_dim))
genImgs = self.generator.predict(noise)

# Rescale images 0 - 1
genImgs = 0.5 * genImgs + 0.5

fig, axs = plt.subplots(r, c)
cnt = 0
for i in range(r):
for j in range(c):
axs[i, j].imshow(genImgs[cnt, :, :, 0], cmap='gray')
axs[i, j].axis('off')
cnt += 1
fig.savefig('generated\\%d.png' % epoch)
plt.close()

def main():
gan = DCGAN()
gan.trainModel(epochs=8000, batch_size=64)

if __name__ == '__main__':
main()

Pytorch实现

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam
import torch.utils.data as Data
from torch.autograd import Variable
import torchvision
from torchvision import transforms
import numpy as np
import matplotlib.pyplot as plt

class Generator(nn.Module):
def __init__(self, input_shape, output_shape):
super().__init__()

self.input_shape = input_shape
self.output_shape = output_shape

self.dense = nn.Sequential(
nn.Linear(self.input_shape, 256 * 7 * 7),
nn.BatchNorm1d(256 * 7 * 7, momentum=0.8),
)

self.conv1 = nn.Sequential(
nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, output_padding=1),
nn.BatchNorm2d(128, momentum=0.8),
nn.ReLU(),
)

self.conv2 = nn.Sequential(
nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1),
nn.BatchNorm2d(64, momentum=0.8),
nn.ReLU(),
)

self.conv3 = nn.Sequential(
nn.ConvTranspose2d(64, 32, kernel_size=3, padding=1),
nn.BatchNorm2d(32, momentum=0.8),
nn.ReLU(),
)

self.conv4 = nn.Sequential(
nn.ConvTranspose2d(32, 1, kernel_size=3, padding=1),
nn.Sigmoid(),
)

def forward(self, tensor_input):
x = self.dense(tensor_input)
x = x.reshape(-1, 256, 7, 7)
x = self.conv1(x)
x = self.conv2(x)
x = self.conv3(x)
output = self.conv4(x)
return output

class Discriminator(nn.Module):
def __init__(self, input_shape):
super().__init__()

self.input_shape = input_shape

self.conv1 = nn.Sequential(
nn.Conv2d(self.input_shape[0], 64, kernel_size=3, stride=2, padding=1),
nn.LeakyReLU(negative_slope=0.2),
nn.Dropout2d(0.4),
)

self.conv2 = nn.Sequential(
nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1),
nn.LeakyReLU(negative_slope=0.2),
nn.Dropout2d(0.4),
)

self.conv3 = nn.Sequential(
nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
nn.LeakyReLU(negative_slope=0.2),
nn.Dropout2d(0.4),
)

self.dense = nn.Sequential(
nn.Flatten(),
nn.Linear(256 * 7 * 7, 1024),
nn.LeakyReLU(negative_slope=0.2),
nn.Linear(1024, 1),
nn.Sigmoid(),
)

def forward(self, img):
x = self.conv1(img)
x = self.conv2(x)
x = self.conv3(x)
output = self.dense(x)

return output

class DCGAN():
def __init__(self):
self.cuda_on = torch.cuda.is_available()

self.input_shape = 100
self.img_shape = (1, 28, 28)

self.generator = Generator(self.input_shape, self.img_shape)
self.discriminator = Discriminator(self.img_shape)

self.optim_G = Adam(self.generator.parameters(), lr=2e-4)
self.optim_D = Adam(self.discriminator.parameters(), lr=2e-4)
self.loss_adver = nn.BCELoss()

if self.cuda_on:
self.generator.cuda()
self.discriminator.cuda()
self.loss_adver.cuda()

def getDataloader(self, batch_size):
mnist = torchvision.datasets.MNIST(
root='./data/', train=True,
transform=transforms.Compose([
transforms.ToTensor(),
])
)
loader = Data.DataLoader(dataset=mnist, batch_size=batch_size, shuffle=True)
return loader

def train(self, epochs=1, batch_size=32):
loader = self.getDataloader(batch_size)

for epoch in range(epochs):
for step, (img_real, _) in enumerate(loader):
num = img_real.shape[0]

valid = torch.ones((num, 1), dtype=torch.float32)
fake = torch.zeros((num, 1), dtype=torch.float32)
z = torch.randn(num, self.input_shape)

if self.cuda_on:
valid = valid.cuda()
fake = fake.cuda()
z = z.cuda()
img_real = img_real.cuda()

img_gen = self.generator(z)

'''Train Discriminator'''
D_loss_real = self.loss_adver(self.discriminator(img_real), valid)
D_loss_fake = self.loss_adver(self.discriminator(img_gen), fake)
D_loss = (D_loss_real + D_loss_fake) / 2

self.optim_D.zero_grad()
D_loss.backward(retain_graph=True)
self.optim_D.step()

'''Train Generator'''
G_loss = self.loss_adver(self.discriminator(img_gen), valid)

self.optim_G.zero_grad()
G_loss.backward()
self.optim_G.step()

print('Epoch:', epoch+1, ' Step:', step, ' D_loss:', D_loss.item(), ' G_loss:', G_loss.item())

if (step+1) % 400 == 0:
torchvision.utils.save_image(
img_gen.data[:9], 'gen\\{}_{}.png'.format(epoch, step), nrow=3)

if __name__ == '__main__':
gan = DCGAN()
gan.train(epochs=10, batch_size=64)