如何摆脱棋盘文物

斯特凡

各位编码员大家好,

我正在使用完全卷积的自动编码器为黑白图像着色,但是,输出具有棋盘格图案,我想摆脱它。到目前为止,我所看到的棋盘状工件都比我的小得多,而摆脱它们的通常方法是用双线性上采样代替所有解池操作(有人告诉我)。

但是我不能简单地替换unpooling操作,因为我使用不同大小的图像,因此需要unpooling操作,否则输出张量的大小可能会与原始张量不同。

TLDR:

如何在不替换分拆操作的情况下消除这些棋盘工件?

class AE(nn.Module):
    def __init__(self):
        super(AE, self).__init__()
        self.leaky_reLU = nn.LeakyReLU(0.2)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=1, return_indices=True)
        self.unpool = nn.MaxUnpool2d(kernel_size=2, stride=2, padding=1)
        self.softmax = nn.Softmax2d()

        self.conv1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1)
        self.conv4 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1)
        self.conv5 = nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=3, stride=1, padding=1)
        self.conv6 = nn.ConvTranspose2d(in_channels=1024, out_channels=512, kernel_size=3, stride=1, padding=1)
        self.conv7 = nn.ConvTranspose2d(in_channels=512, out_channels=256, kernel_size=3, stride=1, padding=1)
        self.conv8 = nn.ConvTranspose2d(in_channels=256, out_channels=128, kernel_size=3, stride=1, padding=1)
        self.conv9 = nn.ConvTranspose2d(in_channels=128, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.conv10 = nn.ConvTranspose2d(in_channels=64, out_channels=2, kernel_size=3, stride=1, padding=1)

    def forward(self, x):

        # encoder
        x = self.conv1(x)
        x = self.leaky_reLU(x)
        size1 = x.size()
        x, indices1 = self.pool(x)

        x = self.conv2(x)
        x = self.leaky_reLU(x)
        size2 = x.size()
        x, indices2 = self.pool(x)

        x = self.conv3(x)
        x = self.leaky_reLU(x)
        size3 = x.size()
        x, indices3 = self.pool(x)

        x = self.conv4(x)
        x = self.leaky_reLU(x)
        size4 = x.size()
        x, indices4 = self.pool(x)

        ######################
        x = self.conv5(x)
        x = self.leaky_reLU(x)

        x = self.conv6(x)
        x = self.leaky_reLU(x)
        ######################

        # decoder
        x = self.unpool(x, indices4, output_size=size4)
        x = self.conv7(x)
        x = self.leaky_reLU(x)

        x = self.unpool(x, indices3, output_size=size3)
        x = self.conv8(x)
        x = self.leaky_reLU(x)

        x = self.unpool(x, indices2, output_size=size2)
        x = self.conv9(x)
        x = self.leaky_reLU(x)

        x = self.unpool(x, indices1, output_size=size1)
        x = self.conv10(x)
        x = self.softmax(x)

        return x

神经网络输出

编辑-解决方案:

跳过连接是必经之路!

class AE(nn.Module):
    def __init__(self):
        super(AE, self).__init__()
        self.leaky_reLU = nn.LeakyReLU(0.2)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=1, return_indices=True)
        self.unpool = nn.MaxUnpool2d(kernel_size=2, stride=2, padding=1)
        self.softmax = nn.Softmax2d()

        self.conv1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1)
        self.conv4 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1)
        self.conv5 = nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=3, stride=1, padding=1)
        self.conv6 = nn.Conv2d(in_channels=1024, out_channels=512, kernel_size=3, stride=1, padding=1)
        self.conv7 = nn.Conv2d(in_channels=1024, out_channels=256, kernel_size=3, stride=1, padding=1)
        self.conv8 = nn.Conv2d(in_channels=512, out_channels=128, kernel_size=3, stride=1, padding=1)
        self.conv9 = nn.Conv2d(in_channels=256, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.conv10 = nn.Conv2d(in_channels=128, out_channels=2, kernel_size=3, stride=1, padding=1)

    def forward(self, x):

        # encoder
        x = self.conv1(x)
        out1 = self.leaky_reLU(x)
        x = out1
        size1 = x.size()
        x, indices1 = self.pool(x)

        x = self.conv2(x)
        out2 = self.leaky_reLU(x)
        x = out2
        size2 = x.size()
        x, indices2 = self.pool(x)

        x = self.conv3(x)
        out3 = self.leaky_reLU(x)
        x = out3
        size3 = x.size()
        x, indices3 = self.pool(x)

        x = self.conv4(x)
        out4 = self.leaky_reLU(x)
        x = out4
        size4 = x.size()
        x, indices4 = self.pool(x)

        ######################
        x = self.conv5(x)
        x = self.leaky_reLU(x)

        x = self.conv6(x)
        x = self.leaky_reLU(x)
        ######################

        # decoder
        x = self.unpool(x, indices4, output_size=size4)
        x = self.conv7(torch.cat((x, out4), 1))
        x = self.leaky_reLU(x)

        x = self.unpool(x, indices3, output_size=size3)
        x = self.conv8(torch.cat((x, out3), 1))
        x = self.leaky_reLU(x)

        x = self.unpool(x, indices2, output_size=size2)
        x = self.conv9(torch.cat((x, out2), 1))
        x = self.leaky_reLU(x)

        x = self.unpool(x, indices1, output_size=size1)
        x = self.conv10(torch.cat((x, out1), 1))
        x = self.softmax(x)

        return x
考希克·罗伊(Kaushik Roy)

Skip connection is commonly used in Encoder-Decoder architecture and it helps to produce accurate result by passing appearance information from shallow layer of encoder (discriminator) to corresponding deeper layer of decoder (generator). Unet is the widely used Encoder-Decoder type architecture. Linknet is also very popular and it differs with Unet in the way of fusing appearance information of encoder layer with the decoder layer. In case of Unet, incoming features (from encoder) are concatenated in the corresponding decoder layer. On the other hand, Linknet performs addition and that why Linknet requires fewer number of operations in a single forward pass and significantly faster than the Unet.

Your each convolution block in Decoder might looks like following: 在此处输入图片说明

另外,我在下面附上一个图,描述了Unet和LinkNet的体系结构。希望使用跳过连接会有所帮助。

在此处输入图片说明

本文收集自互联网,转载请注明来源。

如有侵权,请联系[email protected] 删除。

编辑于
0

我来说两句

0条评论
登录后参与评论

相关文章