first

2023-01-04 04:02:19 +01:00 · 2023-01-04 04:02:19 +01:00 · 3126f289ff
commit 3126f289ff
5 changed files with 49225 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,2 @@
 __pycache__
--- a/data/stackoverflow.csv
+++ b/data/stackoverflow.csv
--- a/functions.py
+++ b/functions.py
@ -0,0 +1,86 @@
 import torch
 import numpy as np
 import scipy
 # Define the hyperparameters
 num_layers = 2
 batch_size = 32
 hidden_dim = 256
 def random_rotation(inputs):
    angle = np.random.uniform(-180, 180)
    inputs = scipy.ndimage.rotate(inputs, angle, reshape=False)
    return inputs
 def random_scaling(inputs):
    scale = np.random.uniform(0.8, 1.2)
    inputs = scipy.ndimage.zoom(inputs, scale)
    return inputs
 def random_translation(inputs):
    shift = np.random.uniform(-0.2, 0.2)
    inputs = scipy.ndimage.shift(inputs, shift)
    return inputs
 def random_shearing(inputs):
    shear = np.random.uniform(-0.2, 0.2)
    inputs = scipy.ndimage.shear(inputs, shear)
    return inputs
 def random_flipping(inputs):
    inputs = scipy.ndimage.flip(inputs, axis=1)
    return inputs
 def data_augmentation(inputs):
    # Apply random rotation
    inputs = random_rotation(inputs)
    # Apply random scaling
    inputs = random_scaling(inputs)
    # Apply random translation
    inputs = random_translation(inputs)
    # Apply random shearing
    inputs = random_shearing(inputs)
    # Apply random flipping
    inputs = random_flipping(inputs)
    return inputs
 def evaluate(model, test_data, hyperparameters, recurrent_network=False, pre_trained_model=False, fine_tuning=False):
    # Use GPU for training if available
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    # Define the hidden state
    hidden = (torch.zeros(num_layers, batch_size, hidden_dim).to(device),
          torch.zeros(num_layers, batch_size, hidden_dim).to(device))
    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for data in test_data:
            inputs, labels = data
            # Use data augmentation
            inputs = data_augmentation(inputs)
            # Use GPU for training
            inputs = inputs.to(device)
            labels = labels.to(device)
            # Use recurrent network
            if recurrent_network:
                outputs = model(inputs, hidden)
            else:
                outputs = model(inputs)
            # Use pre-trained model
            if pre_trained_model:
                outputs = model.forward_from_pretrained(inputs)
            # Use fine-tuning
            if fine_tuning:
                outputs = model.fine_tune(inputs, hyperparameters)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        accuracy = 100 * correct / total
    return accuracy
 def adjust_learning_rate(optimizer, epoch):
    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
    lr = 0.001 * (0.1 ** (epoch // 30))
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr
--- a/main.py
+++ b/main.py
@ -0,0 +1,83 @@
 import torch
 import torch.nn as nn
 import torch.optim as optim
 from torchtext import data
 from gensim.corpora import WikiCorpus
 from transformers import GPT2Tokenizer, GPT2Model
 from functions import *
 # Define the hyperparameters
 num_layers = 2
 batch_size = 32
 hidden_dim = 256
 # Load the GPT2 model
 tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
 model = GPT2Model.from_pretrained('gpt2')
 # Load the data
 wiki_corpus = WikiCorpus('enwiki-latest-pages-articles.xml.bz2')
 stackoverflow_corpus = data.TabularDataset('stackoverflow.csv', format='csv', fields=['text'])
 # Preprocess the data
 wiki_data = [text for text in wiki_corpus]
 stackoverflow_data = [text for text in stackoverflow_corpus]
 # Convert the data to a format compatible with PyTorch
 wiki_data = torch.tensor(wiki_data)
 stackoverflow_data = torch.tensor(stackoverflow_data)
 # Define the Adam optimizer
 optimizer = optim.Adam(model.parameters(), lr=0.001)
 # Define the loss function
 criterion = nn.CrossEntropyLoss()
 # Train the model
 num_epochs=10
 labels = torch.tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 1])
 def adjust_learning_rate(optimizer, epoch):
    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
    lr = 0.001 * (0.1 ** (epoch // 30))
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr
 for epoch in range(num_epochs):
    # Forward pass
    outputs = model(wiki_data, stackoverflow_data)
    # Calculate the loss
    loss = criterion(outputs, labels)
    # Backward pass
    loss.backward()
    # Update the parameters
    optimizer.step()
    # Reset the gradients
    optimizer.zero_grad()
    # Evaluate the model
    accuracy = evaluate(model, wiki_data)
    # Save the model weights and states
    torch.save(model.state_dict(), 'model.pth')
    # Adjust the learning rate
    adjust_learning_rate(optimizer, epoch)
 # Define the model
 class GPT(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, num_layers):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, vocab_size)
        self.gpt2 = model
    def forward(self, x):
        # Embed the input
        x = self.embedding(x)
        # Pass through the GPT2 model
        x = self.gpt2(x)
        # Pass through the LSTM
        x, _ = self.lstm(x)
        # Pass through the fully connected layer
        x = self.fc(x)
        return x
--- a/main2.py
+++ b/main2.py
@ -0,0 +1,77 @@
 import torch
 import torch.nn as nn
 import torch.optim as optim
 from torchtext import data
 from gensim.corpora import WikiCorpus
 from transformers import GPT2Tokenizer, GPT2Model
 from functions import *
 # Define the model
 class GPT(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, num_layers):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, vocab_size)
        self.gpt2 = model
    def forward(self, x):
        # Embed the input
        x = self.embedding(x)
        # Pass through the GPT2 model
        x = self.gpt2(x)
        # Pass through the LSTM
        x, _ = self.lstm(x)
        # Pass through the fully connected layer
        x = self.fc(x)
        return x
 # Load the GPT2 model
 tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
 model = GPT2Model.from_pretrained('gpt2')
 # Load the data
 wiki_corpus_en = WikiCorpus('data/enwiki-latest-pages-articles.xml.bz2')
 wiki_corpus_fr = WikiCorpus('data/frwiki-latest-pages-articles.xml.bz2')
 # stackoverflow_corpus = data.TabularDataset('data/stackoverflow.csv', format='csv', fields=['text'])
 # Preprocess the data
 wiki_data_en = [text for text in wiki_corpus_en]
 wiki_data_fr = [text for text in wiki_corpus_fr]
 # stackoverflow_data = [text for text in stackoverflow_corpus]
 # Convert the data to a format compatible with PyTorch
 wiki_data_en = torch.tensor(wiki_data_en)
 wiki_data_fr = torch.tensor(wiki_data_fr)
 # stackoverflow_data = torch.tensor(stackoverflow_data)
 # Define the Adam optimizer
 optimizer = optim.Adam(model.parameters(), lr=0.001)
 # Define the loss function
 criterion = nn.CrossEntropyLoss()
 # Train the model
 num_epochs=10
 labels = torch.tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 1])
 for epoch in range(num_epochs):
    # Forward pass
    # outputs = model(wiki_data, stackoverflow_data)
    outputs = model(wiki_data_en, wiki_data_fr)
    # Calculate the loss
    loss = criterion(outputs, labels)
    # Backward pass
    loss.backward()
    # Update the parameters
    optimizer.step()
    # Reset the gradients
    optimizer.zero_grad()
    # Evaluate the model
    accuracy = evaluate(model, wiki_data_en)
    # Save the model weights and states
    torch.save(model.state_dict(), 'model.pth')
    # Adjust the learning rate
    adjust_learning_rate(optimizer, epoch)
    # Print the loss and accuracy
    print('Epoch: {}, Loss: {:.4f}, Accuracy: {:.4f}'.format(epoch+1, loss.item(), accuracy))