I'm working on a VQA model, and I need some help as I'm new to this.
I want to use transfer learning from the VGG19 network before running the train, so when I start the train, I will have the image features ahead (trying to solve performance issue).
Does it possible to do so? If so, can someone please share an example with pytorch?
below is the relevant code:
class img_CNN(nn.Module):
def __init__(self, img_size):
super(img_CNN, self).__init__()
self.model = models.vgg19(pretrained=True)
self.in_features = self.model.classifier[-1].in_features
self.model.classifier = nn.Sequential(*list(self.model.classifier.children())[:-1]) # remove vgg19 last layer
self.fc = nn.Linear(in_features, img_size)
def forward(self, image):
#with torch.no_grad():
img_feature = self.model(image) # (batch, channel, height, width)
img_feature = self.fc(img_feature)
return img_feature
class vqamodel(nn.Module):
def __init__(self, output_dim,input_dim, emb_dim, hid_dim, n_layers, dropout, answer_len, que_size, img_size,model_vgg,in_features):
super(vqamodel,self).__init__()
self.image=img_CNN(img_size)
self.question=question_lstm(input_dim, emb_dim, hid_dim, n_layers, dropout,output_dim,que_size)
self.tanh=nn.Tanh()
self.relu=nn.ReLU()
self.dropout=nn.Dropout(dropout)
self.fc1=nn.Linear(que_size,answer_len) #the input to the linear network is equal to the combain vector
self.softmax=nn.Softmax(dim=1)
def forward(self, image, question):
image_emb=self.image(image)
question_emb=self.question(question)
combine =question_emb*image_emb
out_feature=self.fc1(combine)
out_feature=self.relu(out_feature)
return (out_feature)
How can I take out the models.vgg19(pretrained=True),run it before the train on the image dataloader and save the image representation in NumPy array?
thank you!