import torch import torch.nn as nn import torch.optim as optim from torchtext import data from gensim.corpora import WikiCorpus from transformers import GPT2Tokenizer, GPT2Model from functions import * # Define the hyperparameters num_layers = 2 batch_size = 32 hidden_dim = 256 # Load the GPT2 model tokenizer = GPT2Tokenizer.from_pretrained('gpt2') model = GPT2Model.from_pretrained('gpt2') # Load the data wiki_corpus = WikiCorpus('enwiki-latest-pages-articles.xml.bz2') stackoverflow_corpus = data.TabularDataset('stackoverflow.csv', format='csv', fields=['text']) # Preprocess the data wiki_data = [text for text in wiki_corpus] stackoverflow_data = [text for text in stackoverflow_corpus] # Convert the data to a format compatible with PyTorch wiki_data = torch.tensor(wiki_data) stackoverflow_data = torch.tensor(stackoverflow_data) # Define the Adam optimizer optimizer = optim.Adam(model.parameters(), lr=0.001) # Define the loss function criterion = nn.CrossEntropyLoss() # Train the model num_epochs=10 labels = torch.tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 1]) def adjust_learning_rate(optimizer, epoch): """Sets the learning rate to the initial LR decayed by 10 every 30 epochs""" lr = 0.001 * (0.1 ** (epoch // 30)) for param_group in optimizer.param_groups: param_group['lr'] = lr for epoch in range(num_epochs): # Forward pass outputs = model(wiki_data, stackoverflow_data) # Calculate the loss loss = criterion(outputs, labels) # Backward pass loss.backward() # Update the parameters optimizer.step() # Reset the gradients optimizer.zero_grad() # Evaluate the model accuracy = evaluate(model, wiki_data) # Save the model weights and states torch.save(model.state_dict(), 'model.pth') # Adjust the learning rate adjust_learning_rate(optimizer, epoch) # Define the model class GPT(nn.Module): def __init__(self, vocab_size, embedding_dim, hidden_dim, num_layers): super().__init__() self.embedding = nn.Embedding(vocab_size, embedding_dim) self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers, batch_first=True) self.fc = nn.Linear(hidden_dim, vocab_size) self.gpt2 = model def forward(self, x): # Embed the input x = self.embedding(x) # Pass through the GPT2 model x = self.gpt2(x) # Pass through the LSTM x, _ = self.lstm(x) # Pass through the fully connected layer x = self.fc(x) return x