first

2023-01-04 04:02:19 +01:00 · 2023-01-04 04:02:19 +01:00 · 3126f289ff
commit 3126f289ff
5 changed files with 49225 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,2 @@
+__pycache__
+
--- a/data/stackoverflow.csv
+++ b/data/stackoverflow.csv
--- a/functions.py
+++ b/functions.py
@ -0,0 +1,86 @@
+import torch
+import numpy as np
+import scipy
+
+# Define the hyperparameters
+num_layers = 2
+batch_size = 32
+hidden_dim = 256
+
+def random_rotation(inputs):
+    angle = np.random.uniform(-180, 180)
+    inputs = scipy.ndimage.rotate(inputs, angle, reshape=False)
+    return inputs
+
+def random_scaling(inputs):
+    scale = np.random.uniform(0.8, 1.2)
+    inputs = scipy.ndimage.zoom(inputs, scale)
+    return inputs
+
+def random_translation(inputs):
+    shift = np.random.uniform(-0.2, 0.2)
+    inputs = scipy.ndimage.shift(inputs, shift)
+    return inputs
+
+def random_shearing(inputs):
+    shear = np.random.uniform(-0.2, 0.2)
+    inputs = scipy.ndimage.shear(inputs, shear)
+    return inputs
+
+def random_flipping(inputs):
+    inputs = scipy.ndimage.flip(inputs, axis=1)
+    return inputs
+
+def data_augmentation(inputs):
+    # Apply random rotation
+    inputs = random_rotation(inputs)
+    # Apply random scaling
+    inputs = random_scaling(inputs)
+    # Apply random translation
+    inputs = random_translation(inputs)
+    # Apply random shearing
+    inputs = random_shearing(inputs)
+    # Apply random flipping
+    inputs = random_flipping(inputs)
+    return inputs
+
+def evaluate(model, test_data, hyperparameters, recurrent_network=False, pre_trained_model=False, fine_tuning=False):
+    # Use GPU for training if available
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    
+    # Define the hidden state
+    hidden = (torch.zeros(num_layers, batch_size, hidden_dim).to(device),
+          torch.zeros(num_layers, batch_size, hidden_dim).to(device))
+    model.eval()
+    with torch.no_grad():
+        correct = 0
+        total = 0
+        for data in test_data:
+            inputs, labels = data
+            # Use data augmentation
+            inputs = data_augmentation(inputs)
+            # Use GPU for training
+            inputs = inputs.to(device)
+            labels = labels.to(device)
+            # Use recurrent network
+            if recurrent_network:
+                outputs = model(inputs, hidden)
+            else:
+                outputs = model(inputs)
+            # Use pre-trained model
+            if pre_trained_model:
+                outputs = model.forward_from_pretrained(inputs)
+            # Use fine-tuning
+            if fine_tuning:
+                outputs = model.fine_tune(inputs, hyperparameters)
+            _, predicted = torch.max(outputs.data, 1)
+            total += labels.size(0)
+            correct += (predicted == labels).sum().item()
+        accuracy = 100 * correct / total
+    return accuracy
+
+def adjust_learning_rate(optimizer, epoch):
+    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
+    lr = 0.001 * (0.1 ** (epoch // 30))
+    for param_group in optimizer.param_groups:
+        param_group['lr'] = lr
--- a/main.py
+++ b/main.py
@ -0,0 +1,83 @@
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from torchtext import data
+from gensim.corpora import WikiCorpus
+from transformers import GPT2Tokenizer, GPT2Model
+from functions import *
+
+# Define the hyperparameters
+num_layers = 2
+batch_size = 32
+hidden_dim = 256
+
+# Load the GPT2 model
+tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
+model = GPT2Model.from_pretrained('gpt2')
+
+# Load the data
+wiki_corpus = WikiCorpus('enwiki-latest-pages-articles.xml.bz2')
+stackoverflow_corpus = data.TabularDataset('stackoverflow.csv', format='csv', fields=['text'])
+
+# Preprocess the data
+wiki_data = [text for text in wiki_corpus]
+stackoverflow_data = [text for text in stackoverflow_corpus]
+
+# Convert the data to a format compatible with PyTorch
+wiki_data = torch.tensor(wiki_data)
+stackoverflow_data = torch.tensor(stackoverflow_data)
+
+# Define the Adam optimizer
+optimizer = optim.Adam(model.parameters(), lr=0.001)
+
+# Define the loss function
+criterion = nn.CrossEntropyLoss()
+
+# Train the model
+num_epochs=10
+labels = torch.tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 1])
+
+def adjust_learning_rate(optimizer, epoch):
+    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
+    lr = 0.001 * (0.1 ** (epoch // 30))
+    for param_group in optimizer.param_groups:
+        param_group['lr'] = lr
+
+for epoch in range(num_epochs):
+    # Forward pass
+    outputs = model(wiki_data, stackoverflow_data)
+    # Calculate the loss
+    loss = criterion(outputs, labels)
+    # Backward pass
+    loss.backward()
+    # Update the parameters
+    optimizer.step()
+    # Reset the gradients
+    optimizer.zero_grad()
+    # Evaluate the model
+    accuracy = evaluate(model, wiki_data)
+    # Save the model weights and states
+    torch.save(model.state_dict(), 'model.pth')
+    # Adjust the learning rate
+    adjust_learning_rate(optimizer, epoch)
+
+
+# Define the model
+class GPT(nn.Module):
+    def __init__(self, vocab_size, embedding_dim, hidden_dim, num_layers):
+        super().__init__()
+        self.embedding = nn.Embedding(vocab_size, embedding_dim)
+        self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers, batch_first=True)
+        self.fc = nn.Linear(hidden_dim, vocab_size)
+        self.gpt2 = model
+    
+    def forward(self, x):
+        # Embed the input
+        x = self.embedding(x)
+        # Pass through the GPT2 model
+        x = self.gpt2(x)
+        # Pass through the LSTM
+        x, _ = self.lstm(x)
+        # Pass through the fully connected layer
+        x = self.fc(x)
+        return x
--- a/main2.py
+++ b/main2.py
@ -0,0 +1,77 @@
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from torchtext import data
+from gensim.corpora import WikiCorpus
+from transformers import GPT2Tokenizer, GPT2Model
+from functions import *
+
+# Define the model
+class GPT(nn.Module):
+    def __init__(self, vocab_size, embedding_dim, hidden_dim, num_layers):
+        super().__init__()
+        self.embedding = nn.Embedding(vocab_size, embedding_dim)
+        self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers, batch_first=True)
+        self.fc = nn.Linear(hidden_dim, vocab_size)
+        self.gpt2 = model
+    
+    def forward(self, x):
+        # Embed the input
+        x = self.embedding(x)
+        # Pass through the GPT2 model
+        x = self.gpt2(x)
+        # Pass through the LSTM
+        x, _ = self.lstm(x)
+        # Pass through the fully connected layer
+        x = self.fc(x)
+        return x
+
+# Load the GPT2 model
+tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
+model = GPT2Model.from_pretrained('gpt2')
+
+# Load the data
+wiki_corpus_en = WikiCorpus('data/enwiki-latest-pages-articles.xml.bz2')
+wiki_corpus_fr = WikiCorpus('data/frwiki-latest-pages-articles.xml.bz2')
+# stackoverflow_corpus = data.TabularDataset('data/stackoverflow.csv', format='csv', fields=['text'])
+
+# Preprocess the data
+wiki_data_en = [text for text in wiki_corpus_en]
+wiki_data_fr = [text for text in wiki_corpus_fr]
+# stackoverflow_data = [text for text in stackoverflow_corpus]
+
+# Convert the data to a format compatible with PyTorch
+wiki_data_en = torch.tensor(wiki_data_en)
+wiki_data_fr = torch.tensor(wiki_data_fr)
+# stackoverflow_data = torch.tensor(stackoverflow_data)
+
+# Define the Adam optimizer
+optimizer = optim.Adam(model.parameters(), lr=0.001)
+
+# Define the loss function
+criterion = nn.CrossEntropyLoss()
+
+# Train the model
+num_epochs=10
+labels = torch.tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 1])
+
+for epoch in range(num_epochs):
+    # Forward pass
+    # outputs = model(wiki_data, stackoverflow_data)
+    outputs = model(wiki_data_en, wiki_data_fr)
+    # Calculate the loss
+    loss = criterion(outputs, labels)
+    # Backward pass
+    loss.backward()
+    # Update the parameters
+    optimizer.step()
+    # Reset the gradients
+    optimizer.zero_grad()
+    # Evaluate the model
+    accuracy = evaluate(model, wiki_data_en)
+    # Save the model weights and states
+    torch.save(model.state_dict(), 'model.pth')
+    # Adjust the learning rate
+    adjust_learning_rate(optimizer, epoch)
+    # Print the loss and accuracy
+    print('Epoch: {}, Loss: {:.4f}, Accuracy: {:.4f}'.format(epoch+1, loss.item(), accuracy))