原因
调用了自定义的类,但是在自定义的类的__init__函数下面没有写super( XXX, self ).init()
错误案例
import torch
import torch.nn as nn
class SelfAttention(nn.Module):
""" Self-Attention """
def __init__(self, n_head, d_k, d_v, d_x, d_o):
#super(SelfAttention, self).__init__()#SelfAttention 是类名
self.wq = nn.Parameter(torch.Tensor(d_x, d_k))
self.wk = nn.Parameter(torch.Tensor(d_x, d_k))
self.wv = nn.Parameter(torch.Tensor(d_x, d_v))
self.mha = MultiHeadAttention(n_head=n_head, d_k_=d_k, d_v_=d_v, d_k=d_k, d_v=d_v, d_o=d_o)
self.init_parameters()
def init_parameters(self):
for param in self.parameters():
stdv = 1. / np.power(param.size(-1), 0.5)
param.data.uniform_(-stdv, stdv)
def forward(self, x, mask=None):
q = torch.matmul(x, self.wq)
k = torch.matmul(x, self.wk)
v = torch.matmul(x, self.wv)
attn, output = self.mha(q, k, v, mask=mask)
return attn, output
if __name__ == "__main__":
n_x = 4
d_x = 80
batch = 2
x = torch.randn(batch, n_x, d_x)
mask = torch.zeros(batch, n_x, n_x).bool()
selfattn = SelfAttention(n_head=8, d_k=128, d_v=64, d_x=80, d_o=80)
attn, output = selfattn(x, mask=mask)
print(attn.size())
print(output.size())
正确案例
import torch
import torch.nn as nn
class SelfAttention(nn.Module):
""" Self-Attention """
def __init__(self, n_head, d_k, d_v, d_x, d_o):
super(SelfAttention, self).__init__()#SelfAttention 是类名
self.wq = nn.Parameter(torch.Tensor(d_x, d_k))
self.wk = nn.Parameter(torch.Tensor(d_x, d_k))
self.wv = nn.Parameter(torch.Tensor(d_x, d_v))
self.mha = MultiHeadAttention(n_head=n_head, d_k_=d_k, d_v_=d_v, d_k=d_k, d_v=d_v, d_o=d_o)
self.init_parameters()
def init_parameters(self):
for param in self.parameters():
stdv = 1. / np.power(param.size(-1), 0.5)
param.data.uniform_(-stdv, stdv)
def forward(self, x, mask=None):
q = torch.matmul(x, self.wq)
k = torch.matmul(x, self.wk)
v = torch.matmul(x, self.wv)
attn, output = self.mha(q, k, v, mask=mask)
return attn, output
if __name__ == "__main__":
n_x = 4
d_x = 80
batch = 2
x = torch.randn(batch, n_x, d_x)
mask = torch.zeros(batch, n_x, n_x).bool()
selfattn = SelfAttention(n_head=8, d_k=128, d_v=64, d_x=80, d_o=80)
attn, output = selfattn(x, mask=mask)
print(attn.size())
print(output.size())