Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • Antoine.Marchal-Dombrat/dsl-nn
1 result
Show changes
Commits on Source (2)
Showing
with 708 additions and 51 deletions
from groq import Groq
import sys
prompts = [
'Here is the compiler from dsl to python code : \n Give me the full python code with all methods, variables, ... To create the program from this file : $file, be more generic csv do not have column name. Name of dataset is \'test_binary.csv\', if you need to load a pretrained model use \'model.pth\' Here is the first line of my csv : 5.1,3.5,1.4,0.2,Iris-setosa \n Give only the code without any comments or annotation. Give the code between ``` '
]
GROQ_API_KEY = "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"
def parse_response(content):
client = Groq(api_key=GROQ_API_KEY)
chat_completion = client.chat.completions.create(
messages=[
{
"role": "user",
"content": content,
}
],
model="llama3-70b-8192",
)
response = chat_completion.choices[0].message.content
response = response.split('```')[1]
return response
def compile(input_file,output_file):
with open(input_file, 'rb') as f:
file = f.read().decode('utf-8')
prompt = prompts[0].replace('$file',file)
response = parse_response(prompt)
with open(output_file, 'wb') as f:
f.write(response.encode())
if __name__ == "__main__":
if len(sys.argv) < 3:
print("Usage: python script.py output_file.py input_file.dsl [compile-all]")
sys.exit(1)
output_file = sys.argv[1]
input_file = sys.argv[2]
if len(sys.argv) == 4:
for i in range(1,11,1):
compile('example_'+str(i)+'/programme.maj','example_'+str(i)+'/compile_with_llm.py')
else:
compile(input_file,output_file)
\ No newline at end of file
......@@ -223,3 +223,4 @@ def evaluate_model(model, dataset):
evaluate_model(model3, dataset3)
```
To compile all previous example please use : `npm run start compile-all`
\ No newline at end of file
import Levenshtein
import sys
def comparer_textes(reference, candidat):
distance = Levenshtein.distance(reference, candidat)
ratio = Levenshtein.ratio(reference, candidat)
return distance, ratio
if __name__ == '__main__':
if len(sys.argv) < 3:
print("Usage: python script.py programme.py compile_with_all.py")
sys.exit(1)
file1 = sys.argv[1]
file2 = sys.argv[2]
with open(file1,'r') as f:
reference = ""
for line in f.readlines():
if not '#' in line:
reference += line
with open(file2,'r') as f:
candidat = ""
for line in f.readlines():
if not '#' in line:
candidat += line
diffChar, score = comparer_textes(reference, candidat)
print(f"Numbers of chars that differs : {diffChar}")
print(f"Similarity score : {score}")
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import csv
class IrisDataset(Dataset):
def __init__(self, filename):
self.data = []
with open(filename, 'r') as f:
reader = csv.reader(f)
for row in reader:
self.data.append([float(x) for x in row[:-1]] + [torch.tensor([0]) if row[-1] == 'Iris-setosa' else torch.tensor([1]) if row[-1] == 'Iris-versicolor' else torch.tensor([2])])
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
return torch.tensor(self.data[idx][:-1]), self.data[idx][-1]
def shuffle(self):
import random
random.shuffle(self.data)
def split(self, ratio):
split_index = int(ratio * len(self.data))
self.data, _ = self.data[:split_index], self.data[split_index:]
def generateDataLoader(self, batchSize):
return DataLoader(self, batch_size=batchSize, shuffle=True)
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.fc1 = nn.Linear(4, 64)
self.fc2 = nn.Linear(64, 32)
self.fc3 = nn.Linear(32, 3)
def forward(self, x):
x = torch.relu(self.fc1(x))
x = torch.relu(self.fc2(x))
x = self.fc3(x)
return torch.softmax(x, dim=1)
def accuracy(y_pred, y_true):
_, predicted = torch.max(y_pred, 1)
correct = (predicted == y_true).sum().item()
return correct / len(y_true)
def recall(y_pred, y_true):
_, predicted = torch.max(y_pred, 1)
true_positives = (predicted == y_true).sum().item()
actual_positives = (y_true == 1).sum().item()
return true_positives / actual_positives
model1 = Net()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model1.parameters(), lr=0.001)
dataset1 = IrisDataset("test_binary.csv")
dataset1.shuffle()
dataset1.split(0.8)
model1.add_module('metrics', accuracy)
model1.add_module('metrics', recall)
dataLoader = dataset1.generateDataLoader(32)
for epoch in range(30):
for i, data in enumerate(dataLoader):
inputs, labels = data
optimizer.zero_grad()
outputs = model1(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
model_eval = model1.eval()
loss_eval = criterion(model_eval(torch.tensor([float(x) for x in dataset1.data[0][:-1]])), torch.tensor([dataset1.data[0][-1]]))
torch.save(model1.state_dict(), 'model.pth')
......@@ -3,6 +3,7 @@ newModel model1
configure {
model1.Adamoptimize(0.001)
model1.setLoss(crossEntropy)
model1.setHyperparameter(epochs=30, lr=0.01, batchSize=32)
model1.compile(linear(4, 64), applyReLU(0.5), linear(64, 32), applyReLU(0.5), linear(32, 3), applySoftmax(0.2))
Dataset dataset1 = loadData("test.csv")
dataset1.shuffle()
......@@ -12,4 +13,5 @@ configure {
dataset1.generateDataLoader()
model1.train(dataset1)
model1.evaluate(dataset1)
model1.saveModel(model1)
}
\ No newline at end of file
......@@ -19,28 +19,28 @@ class Model_model1(nn.Module):
def __init__(self):
super(Model_model1, self).__init__()
self.linear0 = nn.Linear(4 ,64)
self.leakyRelu1 = nn.LeakyReLU()
self.relu1 = nn.ReLU()
self.linear2 = nn.Linear(64 ,32)
self.leakyRelu3 = nn.LeakyReLU()
self.relu3 = nn.ReLU()
self.linear4 = nn.Linear(32 ,3)
self.leakyRelu5 = nn.LeakyReLU()
self.softmax5 = nn.Softmax()
def forward(self, x):
x = self.linear0(x)
x = self.leakyRelu1(x)
x = self.relu1(x)
x = self.linear2(x)
x = self.leakyRelu3(x)
x = self.relu3(x)
x = self.linear4(x)
x = self.leakyRelu5(x)
x = self.softmax5(x)
return x
########
# Main #
########
model_model1 = Model_model1()
learning_rate_model_model1 = 0.001 # Default learning_rate
batch_size_model_model1 = 16 # Default batch_size
epochs_model_model1 = 10 # Default epochs
learning_rate_model_model1 = 0.01
batch_size_model_model1 = 32
epochs_model_model1 = 30
criterion_model1 = nn.CrossEntropyLoss()
optimizer_model1 = optim.Adam(model_model1.parameters(), lr=learning_rate_model_model1)
......@@ -74,6 +74,7 @@ train_loader_dataset1 = DataLoader(train_dataset_dataset1, batch_size=batch_size
val_loader_dataset1 = DataLoader(val_dataset_dataset1, batch_size=batch_size_model_model1)
def train_model1():
print(f'[*] Training metrics : ')
for epoch in range(epochs_model_model1):
model_model1.train()
for inputs, targets in train_loader_dataset1:
......@@ -82,7 +83,7 @@ def train_model1():
loss = criterion_model1(outputs, targets)
loss.backward()
optimizer_model1.step()
print(f'Epoch {epoch + 1}/{epochs_model_model1}, Loss: {loss.item():.4f}')
print(f' └──[+] Epoch {epoch + 1}/{epochs_model_model1}, Loss: {loss.item():.4f}')
# Train
train_model1()
......@@ -107,8 +108,8 @@ def evaluate_model1():
val_loss /= len(val_loader_dataset1)
all_targets = torch.cat(all_targets)
all_predicted = torch.cat(all_predicted)
print(f'[*] Metrics : ')
print(f' └──[+] Validation Loss: {val_loss:.4f}') # Default metric
print(f'[*] Evaluation metrics : ')
print(f' └──[+] Validaion Loss: {val_loss:.4f}') # Default metric
accuracy_value = accuracy(all_targets, all_predicted)
print(f' └──[+] Accuracy: {accuracy_value:.4f}')
recall_value = recall(all_targets, all_predicted)
......@@ -116,5 +117,6 @@ def evaluate_model1():
# Evaluate
evaluate_model1()
torch.save(model_model1.state_dict(), 'model_model1.pth')
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import precision_score, recall_score, f1_score
class CustomDataset(Dataset):
def __init__(self, data, target):
self.data = torch.tensor(data, dtype=torch.float)
self.target = torch.tensor(target, dtype=torch.long)
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
return self.data[idx], self.target[idx]
def loadData(file_path):
data = pd.read_csv(file_path, header=None)
return data
def splitData(data, ratio):
split_idx = int(ratio * len(data))
return data[:split_idx], data[split_idx:]
def generateDataLoader(data, batch_size=32, shuffle=True):
dataset = CustomDataset(data.iloc[:, :-1].values, data.iloc[:, -1].values)
return DataLoader(dataset, batch_size=batch_size, shuffle=shuffle)
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.fc1 = nn.Linear(4, 32)
self.leaky_relu = nn.LeakyReLU(0.02)
self.fc2 = nn.Linear(32, 3)
self.softmax = nn.Softmax(dim=1)
def forward(self, x):
x = self.leaky_relu(self.fc1(x))
x = self.fc2(x)
x = self.softmax(x)
return x
def precision(y_pred, y_true, threshold=0.9):
y_pred = (y_pred > threshold).int()
return precision_score(y_true, y_pred)
def recall(y_pred, y_true, threshold=0.7):
y_pred = (y_pred > threshold).int()
return recall_score(y_true, y_pred)
def crossEntropy(y_pred, y_true):
return nn.CrossEntropyLoss()(y_pred, y_true)
def main():
data = loadData("test_binary.csv")
train_data, val_data = splitData(data, 0.8)
train_loader = generateDataLoader(train_data)
val_loader = generateDataLoader(val_data)
model = Net()
criterion = crossEntropy
optimizer = optim.Adam(model.parameters(), lr=0.001)
for epoch in range(10): # number of epochs
for x, y in train_loader:
optimizer.zero_grad()
outputs = model(x)
loss = criterion(outputs, y)
loss.backward()
optimizer.step()
print("Precision:", precision(model(val_data.iloc[:, :-1].values), val_data.iloc[:, -1].values))
print("Recall:", recall(model(val_data.iloc[:, :-1].values), val_data.iloc[:, -1].values))
print("F1 Score:", f1_score(model(val_data.iloc[:, :-1].values), val_data.iloc[:, -1].values))
if __name__ == "__main__":
main()
......@@ -2,7 +2,11 @@
newModel model10
configure {
Dataset data10 = loadData("test.csv")
model10.addMetrics(precision(0.9), f1Score(0.85))
model10.compile(linear(64, 32), applyLeakyReLU(0.02), linear(32, 10), applySoftmax(0.4))
data10.split(0.8)
data10.generateDataLoader()
model10.addMetrics(precision(0.9), recall(0.7))
model10.Adamoptimize(0.001)
model10.setLoss(crossEntropy)
model10.compile(linear(4, 32), applyLeakyReLU(0.02), linear(32, 3), applySoftmax(0.4))
model10.train(data10)
}
\ No newline at end of file
......@@ -5,9 +5,12 @@
# Imports #
###########
import torch.nn as nn
import torch.optim as optim
import torch
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset
#############
# New model #
......@@ -15,16 +18,16 @@ from sklearn.preprocessing import LabelEncoder
class Model_model10(nn.Module):
def __init__(self):
super(Model_model10, self).__init__()
self.linear0 = nn.Linear(64 ,32)
self.linear0 = nn.Linear(4 ,32)
self.leakyRelu1 = nn.LeakyReLU()
self.linear2 = nn.Linear(32 ,10)
self.leakyRelu3 = nn.LeakyReLU()
self.linear2 = nn.Linear(32 ,3)
self.softmax3 = nn.Softmax()
def forward(self, x):
x = self.linear0(x)
x = self.leakyRelu1(x)
x = self.linear2(x)
x = self.leakyRelu3(x)
x = self.softmax3(x)
return x
########
......@@ -34,22 +37,35 @@ model_model10 = Model_model10()
learning_rate_model_model10 = 0.001 # Default learning_rate
batch_size_model_model10 = 16 # Default batch_size
epochs_model_model10 = 10 # Default epochs
criterion_model10 = nn.CrossEntropyLoss()
optimizer_model10 = optim.Adam(model_model10.parameters(), lr=learning_rate_model_model10)
# Read dataset
data_data10 = pd.read_csv('test.csv')
X_data10 = torch.tensor(data_data10.iloc[:, :-1].values, dtype=torch.float32)
label_encoder_model10 = LabelEncoder()
y_encoded_model10 = label_encoder_model10.fit_transform(data_dataset1.iloc[:, -1].values)
y_encoded_model10 = label_encoder_model10.fit_transform(data_data10.iloc[:, -1].values)
y_data10 = torch.tensor(y_encoded_model10, dtype=torch.long)
def f1_score(y_true, y_pred):
p = precision(y_true, y_pred)
r = recall(y_true, y_pred)
if p + r == 0:
def precision(y_true, y_pred):
TP = (y_pred == 1) & (y_true == 1)
FP = (y_pred == 1) & (y_true == 0)
if (TP.sum().item() + FP.sum().item()) == 0:
return 0
return 2 * (p * r) / (p + r)
precision_score = TP.sum().item() / (TP.sum().item() + FP.sum().item())
return precision_score
# Split data
X_train_data10, X_val_data10, y_train_data10, y_val_data10 = train_test_split(X_data10, y_data10, test_size=0.8, shuffle=False)
# Create dataLoader
train_dataset_data10 = TensorDataset(X_train_data10, y_train_data10)
val_dataset_data10 = TensorDataset(X_val_data10, y_val_data10)
train_loader_data10 = DataLoader(train_dataset_data10, batch_size=batch_size_model_model10, shuffle=False)
val_loader_data10 = DataLoader(val_dataset_data10, batch_size=batch_size_model_model10)
def train_model10():
print(f'[*] Training metrics : ')
for epoch in range(epochs_model_model10):
model_model10.train()
for inputs, targets in train_loader_data10:
......@@ -58,7 +74,7 @@ def train_model10():
loss = criterion_model10(outputs, targets)
loss.backward()
optimizer_model10.step()
print(f'Epoch {epoch + 1}/{epochs_model_model10}, Loss: {loss.item():.4f}')
print(f' └──[+] Epoch {epoch + 1}/{epochs_model_model10}, Loss: {loss.item():.4f}')
# Train
train_model10()
......
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
class IrisDataset(Dataset):
def __init__(self, file_path):
self.data = pd.read_csv(file_path, header=None)
self.X = self.data.iloc[:, :-1].values
self.y = self.data.iloc[:, -1].values
def __len__(self):
return len(self.X)
def __getitem__(self, idx):
return self.X[idx], self.y[idx]
def split(self, ratio):
idx = int(len(self) * ratio)
return IrisDataset(self.data.iloc[:idx, :]), IrisDataset(self.data.iloc[idx:, :])
def generateDataLoader(self, batch_size=32):
return DataLoader(self, batch_size=batch_size, shuffle=True)
class PretrainedModel(nn.Module):
def __init__(self):
super(PretrainedModel, self).__init__()
self.fc1 = nn.Linear(4, 128)
self.fc2 = nn.Linear(128, 2)
def forward(self, x):
x = torch.relu(self.fc1(x))
x = self.fc2(x)
return x
def setLoss(self, loss_fn):
self.loss_fn = loss_fn
def evaluate(self, dataset):
self.eval()
total_correct = 0
with torch.no_grad():
for x, y in dataset.generateDataLoader():
x, y = x.float(), y.long()
output = self.forward(x)
_, predicted = torch.max(output, 1)
total_correct += (predicted == y).sum().item()
accuracy = total_correct / len(dataset)
return accuracy
def loadModel(file_path):
model = PretrainedModel()
model.load_state_dict(torch.load(file_path))
return model
def crossEntropy(output, target):
return nn.CrossEntropyLoss()(output, target)
pretrainedModel = loadModel("model.pth")
testDataset = IrisDataset("test_binary.csv")
train_dataset, _ = testDataset.split(0.8)
testDataset.generateDataLoader()
pretrainedModel.setLoss(crossEntropy)
accuracy = pretrainedModel.evaluate(testDataset)
print("Model accuracy on test dataset:", accuracy)
// Program 2: Load a pretrained model and evaluate it on a test dataset
pretrainedModel = loadModel("pretrainedFile")
pretrainedModel = loadModel("model.pth")
configure {
Dataset testDataset = loadData("test.csv")
testDataset.split(0.8)
testDataset.generateDataLoader()
pretrainedModel.setLoss(crossEntropy)
pretrainedModel.evaluate(testDataset)
}
\ No newline at end of file
......@@ -4,26 +4,39 @@
###########
# Imports #
###########
import torch.nn as nn
from torch import load as load_file
import torch
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset
########
# Main #
########
model_pretrainedModel = load_file('pretrainedFile')
model_pretrainedModel = load_file('model.pth')
learning_rate_model_pretrainedModel = 0.001 # Default learning_rate
batch_size_model_pretrainedModel = 16 # Default batch_size
epochs_model_pretrainedModel = 10 # Default epochs
criterion_pretrainedModel = nn.CrossEntropyLoss()
# Read dataset
data_testDataset = pd.read_csv('test.csv')
X_testDataset = torch.tensor(data_testDataset.iloc[:, :-1].values, dtype=torch.float32)
label_encoder_pretrainedModel = LabelEncoder()
y_encoded_pretrainedModel = label_encoder_pretrainedModel.fit_transform(data_dataset1.iloc[:, -1].values)
y_encoded_pretrainedModel = label_encoder_pretrainedModel.fit_transform(data_testDataset.iloc[:, -1].values)
y_testDataset = torch.tensor(y_encoded_pretrainedModel, dtype=torch.long)
# Split data
X_train_testDataset, X_val_testDataset, y_train_testDataset, y_val_testDataset = train_test_split(X_testDataset, y_testDataset, test_size=0.8, shuffle=False)
# Create dataLoader
train_dataset_testDataset = TensorDataset(X_train_testDataset, y_train_testDataset)
val_dataset_testDataset = TensorDataset(X_val_testDataset, y_val_testDataset)
train_loader_testDataset = DataLoader(train_dataset_testDataset, batch_size=batch_size_model_pretrainedModel, shuffle=False)
val_loader_testDataset = DataLoader(val_dataset_testDataset, batch_size=batch_size_model_pretrainedModel)
def evaluate_pretrainedModel():
model_pretrainedModel.eval()
val_loss = 0.0
......@@ -44,8 +57,8 @@ def evaluate_pretrainedModel():
val_loss /= len(val_loader_testDataset)
all_targets = torch.cat(all_targets)
all_predicted = torch.cat(all_predicted)
print(f'[*] Metrics : ')
print(f' └──[+] Validation Loss: {val_loss:.4f}') # Default metric
print(f'[*] Evaluation metrics : ')
print(f' └──[+] Validaion Loss: {val_loss:.4f}') # Default metric
# Evaluate
evaluate_pretrainedModel()
......
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
class CustomDataset(Dataset):
def __init__(self, data, labels):
self.data = data
self.labels = labels
def __len__(self):
return len(self.labels)
def __getitem__(self, idx):
return {
'data': torch.tensor(self.data[idx]),
'label': torch.tensor(self.labels[idx])
}
class NeuralNetwork(nn.Module):
def __init__(self):
super(NeuralNetwork, self).__init__()
self.fc1 = nn.Linear(4, 128)
self.fc2 = nn.Linear(128, 3)
def forward(self, x):
x = torch.sigmoid(self.fc1(x))
x = self.fc2(x)
return torch.softmax(x, dim=1)
def train(model, device, data_loader, optimizer, loss_fn):
model.train()
total_loss = 0
for batch in data_loader:
data, label = batch['data'].to(device), batch['label'].to(device)
optimizer.zero_grad()
output = model(data)
loss = loss_fn(output, label)
loss.backward()
optimizer.step()
total_loss += loss.item()
return total_loss / len(data_loader)
def evaluate(model, device, data_loader, loss_fn):
model.eval()
total_loss = 0
with torch.no_grad():
for batch in data_loader:
data, label = batch['data'].to(device), batch['label'].to(device)
output = model(data)
loss = loss_fn(output, label)
total_loss += loss.item()
return total_loss / len(data_loader)
def main():
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
data = pd.read_csv('test_binary.csv', header=None)
data_values = data.values
data = data_values[:, :-1]
labels = data_values[:, -1]
label_dict = {'Iris-setosa': 0, 'Iris-versicolor': 1, 'Iris-virginica': 2}
labels = [label_dict[i] for i in labels]
dataset = CustomDataset(data, labels)
dataset.shuffle()
train_size = int(0.6 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
model = NeuralNetwork()
optimizer = optim.SGD(model.parameters(), lr=0.01)
loss_fn = nn.CrossEntropyLoss()
for epoch in range(15):
loss = train(model, device, train_loader, optimizer, loss_fn)
print(f'Epoch {epoch+1}, Loss: {loss:.4f}')
loss = evaluate(model, device, test_loader, loss_fn)
print(f'Test Loss: {loss:.4f}')
if __name__ == '__main__':
main()
......@@ -3,9 +3,10 @@ newModel model3
configure {
Dataset dataset2 = loadData("test.csv")
dataset2.shuffle()
dataset2.split(0.6)
dataset2.generateDataLoader()
model3.setHyperparameter(epochs=15, lr=0.01, batchSize=32)
model3.compile(linear(256, 128), applySigmoid(0.6), linear(128, 10), applySoftmax(0.3))
model3.compile(linear(4, 128), applySigmoid(0.6), linear(128, 3), applySoftmax(0.3))
model3.SGDoptimize(0.001)
model3.setLoss(crossEntropy)
model3.train(dataset2)
......
......@@ -9,6 +9,7 @@ import torch.optim as optim
import torch
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset
#############
......@@ -17,16 +18,16 @@ from torch.utils.data import DataLoader, TensorDataset
class Model_model3(nn.Module):
def __init__(self):
super(Model_model3, self).__init__()
self.linear0 = nn.Linear(256 ,128)
self.leakyRelu1 = nn.LeakyReLU()
self.linear2 = nn.Linear(128 ,10)
self.leakyRelu3 = nn.LeakyReLU()
self.linear0 = nn.Linear(4 ,128)
self.sigmoid1 = nn.Sigmoid()
self.linear2 = nn.Linear(128 ,3)
self.softmax3 = nn.Softmax()
def forward(self, x):
x = self.linear0(x)
x = self.leakyRelu1(x)
x = self.sigmoid1(x)
x = self.linear2(x)
x = self.leakyRelu3(x)
x = self.softmax3(x)
return x
########
......@@ -43,9 +44,12 @@ optimizer_model3 = optim.Adam(model_model3.parameters(), lr=learning_rate_model_
data_dataset2 = pd.read_csv('test.csv')
X_dataset2 = torch.tensor(data_dataset2.iloc[:, :-1].values, dtype=torch.float32)
label_encoder_model3 = LabelEncoder()
y_encoded_model3 = label_encoder_model3.fit_transform(data_dataset1.iloc[:, -1].values)
y_encoded_model3 = label_encoder_model3.fit_transform(data_dataset2.iloc[:, -1].values)
y_dataset2 = torch.tensor(y_encoded_model3, dtype=torch.long)
# Split data
X_train_dataset2, X_val_dataset2, y_train_dataset2, y_val_dataset2 = train_test_split(X_dataset2, y_dataset2, test_size=0.6, shuffle=True)
# Create dataLoader
train_dataset_dataset2 = TensorDataset(X_train_dataset2, y_train_dataset2)
val_dataset_dataset2 = TensorDataset(X_val_dataset2, y_val_dataset2)
......@@ -53,6 +57,7 @@ train_loader_dataset2 = DataLoader(train_dataset_dataset2, batch_size=batch_size
val_loader_dataset2 = DataLoader(val_dataset_dataset2, batch_size=batch_size_model_model3)
def train_model3():
print(f'[*] Training metrics : ')
for epoch in range(epochs_model_model3):
model_model3.train()
for inputs, targets in train_loader_dataset2:
......@@ -61,7 +66,7 @@ def train_model3():
loss = criterion_model3(outputs, targets)
loss.backward()
optimizer_model3.step()
print(f'Epoch {epoch + 1}/{epochs_model_model3}, Loss: {loss.item():.4f}')
print(f' └──[+] Epoch {epoch + 1}/{epochs_model_model3}, Loss: {loss.item():.4f}')
# Train
train_model3()
......@@ -86,8 +91,8 @@ def evaluate_model3():
val_loss /= len(val_loader_dataset2)
all_targets = torch.cat(all_targets)
all_predicted = torch.cat(all_predicted)
print(f'[*] Metrics : ')
print(f' └──[+] Validation Loss: {val_loss:.4f}') # Default metric
print(f'[*] Evaluation metrics : ')
print(f' └──[+] Validaion Loss: {val_loss:.4f}') # Default metric
# Evaluate
evaluate_model3()
......
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.linear1 = nn.Linear(4, 64)
self.linear2 = nn.Linear(64, 3)
self.tanh = nn.Tanh()
self.softmax = nn.Softmax(dim=1)
def forward(self, x):
x = self.tanh(self.linear1(x))
x = self.softmax(self.linear2(x))
return x
class DataLoader:
def __init__(self, dataset, batch_size):
self.dataset = dataset
self.batch_size = batch_size
self.index = 0
def __iter__(self):
return self
def __next__(self):
if self.index < len(self.dataset):
batch = self.dataset[self.index:self.index + self.batch_size]
self.index += self.batch_size
return torch.tensor(batch[:, :-1], dtype=torch.float), torch.tensor(batch[:, -1], dtype=torch.long)
else:
raise StopIteration
def load_data(file_path):
dataset = pd.read_csv(file_path, header=None).values
return dataset
def normalize(dataset):
return (dataset[:, :-1] - dataset[:, :-1].min(axis=0)) / (dataset[:, :-1].max(axis=0) - dataset[:, :-1].min(axis=0))
def shuffle(dataset):
np.random.shuffle(dataset)
return dataset
def split_dataset(dataset, split_ratio):
train_size = int(len(dataset) * split_ratio)
return dataset[:train_size], dataset[train_size:]
def generate_data_loader(dataset, batch_size):
return DataLoader(dataset, batch_size)
def cross_entropy_loss(output, target):
return nn.CrossEntropyLoss()(output, target)
def train(model, data_loader, optimizer, criterion):
model.train()
total_loss = 0
for batch in data_loader:
inputs, labels = batch
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
total_loss += loss.item()
return total_loss / len(data_loader)
def save_model(model, file_path):
torch.save(model.state_dict(), file_path)
if __name__ == "__main__":
model = Model()
dataset = load_data("test_binary.csv")
dataset = normalize(dataset)
dataset = shuffle(dataset)
train_dataset, test_dataset = split_dataset(dataset, 0.7)
train_data_loader = generate_data_loader(train_dataset, 32)
optimizer = optim.SGD(model.parameters(), lr=0.005)
criterion = cross_entropy_loss
model.train()
for epoch in range(10):
loss = train(model, train_data_loader, optimizer, criterion)
print(f"Epoch {epoch+1}, Loss: {loss}")
save_model(model, "model.pth")
......@@ -4,9 +4,11 @@ configure {
Dataset dataset4 = loadData("test.csv")
dataset4.normalize()
dataset4.shuffle()
dataset4.split(0.7)
dataset4.generateDataLoader()
model4.SGDoptimize(0.005)
model4.compile(linear(128, 64), applyTanh(0.7), linear(64, 10), applySoftmax(0.4))
model4.setLoss(crossEntropy)
model4.compile(linear(4, 64), applyTanh(0.7), linear(64, 3), applySoftmax(0.4))
model4.train(dataset4)
model4.saveModel(model4_saved)
}
......@@ -10,6 +10,7 @@ import torch
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset
#############
......@@ -18,16 +19,16 @@ from torch.utils.data import DataLoader, TensorDataset
class Model_model4(nn.Module):
def __init__(self):
super(Model_model4, self).__init__()
self.linear0 = nn.Linear(128 ,64)
self.leakyRelu1 = nn.LeakyReLU()
self.linear2 = nn.Linear(64 ,10)
self.leakyRelu3 = nn.LeakyReLU()
self.linear0 = nn.Linear(4 ,64)
self.tanh1 = nn.Tanh()
self.linear2 = nn.Linear(64 ,3)
self.softmax3 = nn.Softmax()
def forward(self, x):
x = self.linear0(x)
x = self.leakyRelu1(x)
x = self.tanh1(x)
x = self.linear2(x)
x = self.leakyRelu3(x)
x = self.softmax3(x)
return x
########
......@@ -37,19 +38,23 @@ model_model4 = Model_model4()
learning_rate_model_model4 = 0.001 # Default learning_rate
batch_size_model_model4 = 16 # Default batch_size
epochs_model_model4 = 10 # Default epochs
criterion_model4 = nn.CrossEntropyLoss()
optimizer_model4 = optim.Adam(model_model4.parameters(), lr=learning_rate_model_model4)
# Read dataset
data_dataset4 = pd.read_csv('test.csv')
X_dataset4 = torch.tensor(data_dataset4.iloc[:, :-1].values, dtype=torch.float32)
label_encoder_model4 = LabelEncoder()
y_encoded_model4 = label_encoder_model4.fit_transform(data_dataset1.iloc[:, -1].values)
y_encoded_model4 = label_encoder_model4.fit_transform(data_dataset4.iloc[:, -1].values)
y_dataset4 = torch.tensor(y_encoded_model4, dtype=torch.long)
# Normalize data
scaler = StandardScaler()
X_dataset4 = scaler.fit_transform(X_dataset4)
# Split data
X_train_dataset4, X_val_dataset4, y_train_dataset4, y_val_dataset4 = train_test_split(X_dataset4, y_dataset4, test_size=0.7, shuffle=True)
# Create dataLoader
train_dataset_dataset4 = TensorDataset(X_train_dataset4, y_train_dataset4)
val_dataset_dataset4 = TensorDataset(X_val_dataset4, y_val_dataset4)
......@@ -57,6 +62,7 @@ train_loader_dataset4 = DataLoader(train_dataset_dataset4, batch_size=batch_size
val_loader_dataset4 = DataLoader(val_dataset_dataset4, batch_size=batch_size_model_model4)
def train_model4():
print(f'[*] Training metrics : ')
for epoch in range(epochs_model_model4):
model_model4.train()
for inputs, targets in train_loader_dataset4:
......@@ -65,7 +71,7 @@ def train_model4():
loss = criterion_model4(outputs, targets)
loss.backward()
optimizer_model4.step()
print(f'Epoch {epoch + 1}/{epochs_model_model4}, Loss: {loss.item():.4f}')
print(f' └──[+] Epoch {epoch + 1}/{epochs_model_model4}, Loss: {loss.item():.4f}')
# Train
train_model4()
......
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
class CustomDataset(Dataset):
def __init__(self, data, transform=None):
self.data = data
self.transform = transform
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
x = self.data.iloc[idx, :-1].values.astype(np.float32)
y = self.data.iloc[idx, -1]
if self.transform:
x = self.transform(x)
return x, y
def accuracy(y_pred, y_true):
_, predicted = torch.max(y_pred, 1)
correct = (predicted == y_true).sum().item()
total = y_true.size(0)
return correct / total
def recall(y_pred, y_true):
_, predicted = torch.max(y_pred, 1)
true_positives = (predicted * y_true).sum().item()
false_negatives = ((predicted - 1) * (y_true - 1)).sum().item()
return true_positives / (true_positives + false_negatives)
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.fc1 = nn.Linear(4, 256)
self.relu = nn.LeakyReLU(0.01)
self.fc2 = nn.Linear(256, 3)
self.softmax = nn.Softmax(dim=1)
def forward(self, x):
x = self.relu(self.fc1(x))
x = self.fc2(x)
x = self.softmax(x)
return x
def main():
dataset = pd.read_csv('test_binary.csv', header=None)
dataset = CustomDataset(dataset)
train_size = int(0.75 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
model = Model()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
model.add_module("accuracy", accuracy)
model.add_module("recall", recall)
model.train()
for epoch in range(10):
for batch in train_loader:
x, y = batch
optimizer.zero_grad()
outputs = model(x)
loss = criterion(outputs, y)
loss.backward()
optimizer.step()
model.eval()
total_correct = 0
with torch.no_grad():
for batch in test_loader:
x, y = batch
outputs = model(x)
_, predicted = torch.max(outputs, 1)
correct = (predicted == y).sum().item()
total_correct += correct
accuracy = total_correct / len(test_loader.dataset)
print("Accuracy:", accuracy)
recall_score = 0
with torch.no_grad():
for batch in test_loader:
x, y = batch
outputs = model(x)
_, predicted = torch.max(outputs, 1)
true_positives = (predicted * y).sum().item()
false_negatives = ((predicted - 1) * (y - 1)).sum().item()
recall_score += true_positives / (true_positives + false_negatives)
recall_score /= len(test_loader)
print("Recall:", recall_score)
if __name__ == "__main__":
main()
......@@ -2,7 +2,10 @@
newModel model5
configure {
Dataset dataset5 = loadData("test.csv")
dataset5.split(0.75)
dataset5.generateDataLoader()
model5.setLoss(crossEntropy)
model5.addMetrics(accuracy(0.9), recall(0.8))
model5.compile(linear(512, 256), applyLeakyReLU(0.01), linear(256, 10), applySoftmax(0.4))
model5.compile(linear(4, 256), applyLeakyReLU(0.01), linear(256, 3), applySoftmax(0.4))
model5.evaluate(dataset5)
}
\ No newline at end of file