报错内容:
Traceback (most recent call last): File "main.py", line 48, in <module> training_model.train() File "/home/usr/longtail/run_networks.py", line 205, in train self.batch_loss(labels) File "/home/usr/longtail/run_networks.py", line 148, in batch_loss self.loss_perf = self.criterions['PerformanceLoss'](self.logits, labels) \ File "/home/usr/.pyenv/versions/py36torch/lib/python3.6/site-packages/torch/nn/modules/module.py", line 722, in _call_impl result = self.forward(*input, **kwargs) File "/home/usr/.pyenv/versions/py36torch/lib/python3.6/site-packages/torch/nn/modules/loss.py", line 948, in forward ignore_index=self.ignore_index, reduction=self.reduction) File "/home/usr/.pyenv/versions/py36torch/lib/python3.6/site-packages/torch/nn/functional.py", line 2422, in cross_entropy return nll_loss(log_softmax(input, 1), target, weight, None, ignore_index, None, reduction) File "/home/usr/.pyenv/versions/py36torch/lib/python3.6/site-packages/torch/nn/functional.py", line 2216, in nll_loss .format(input.size(0), target.size(0))) ValueError: Expected input batch_size (159) to match target batch_size (160).参考:
https://discuss.pytorch.org/t/valueerror-expected-input-batch-size-324-to-match-target-batch-size-4/24498/2
https://blog.csdn.net/weixin_41513917/article/details/104655556
按照这个解决方法,应该print一下全连接层输入和输出向量的shape,检查对应size是不是对应得上。
还没找到问题所在的地方时,我将使用的gpu改成一个的时候,它正常运行了。
logits输出size[159,53],全连接层设置的神经元[53,690],全连接层的输入feature的size正常应该是[160,690]。
但是,重新将GPU改成两个的时候,全连接层的输入feature的size为[159,690]
所以,为什么会出现这样的问题呢?倒回到特征层。可能是这一层在计算的时候漏了一个没有计算。
def forward(self, flag, input_word, input_pos1, input_pos2, real_pos1, real_pos2, input_mask, input_scope, input_type=None, input_label=None, label_for_sen=None): embeddings = self.embedding_layer(input_word, input_pos1, input_pos2, real_pos1, real_pos2, input_type) embeddings.unsqueeze_(dim=1) x = self.cnn(embeddings) # [batch_size, out_channel, num_step, 1] input_mask.unsqueeze_(dim=1) # [batch_size, 1, num_step, 3] x, _ = torch.max(input_mask + x, dim=2) x = x - 100 x = x.view(-1, self.opt['feature_dim']) encoder_out = self.dropout(self.activation(x)) tower_repre = [] for i in range(len(input_scope) - 1): sen_matrix = encoder_out[input_scope[i] : input_scope[i+1]] size = sen_matrix.size()[0] if flag == 'train': cluster_att = self._train_cluster_weight(sen_matrix, input_label[i], size) final_repre = torch.squeeze(torch.matmul(cluster_att, sen_matrix)) else: cluster_att = self._test_cluster_weight(sen_matrix, size).t() # [53, feature] # print(cluster_att[input_label[i]], input_label[i]) final_repre = torch.matmul(cluster_att, sen_matrix) tower_repre.append(final_repre) bag_repres = torch.stack(tower_repre) # if test, [batch, 53, feature], else [batch, feature] return bag_repres