Compare commits
No commits in common. "87931fb99fede72e44d26ba57974a2a290bfccdf" and "506dd64dd346b8bb0b553e85ba17cacbca726584" have entirely different histories.
87931fb99f
...
506dd64dd3
|
@ -1,543 +0,0 @@
|
||||||
{
|
|
||||||
"cells": [
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# Laden der Rohdaten"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 1,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import pickle\n",
|
|
||||||
"\n",
|
|
||||||
"# Laden der 'kirp' Liste aus der Pickle-Datei\n",
|
|
||||||
"with open('rick.pickle', 'rb') as f:\n",
|
|
||||||
" data_frame = pickle.load(f)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# PCA Klasse zu Reduktion der Dimensionen"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 2,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from torch.utils.data import Dataset\n",
|
|
||||||
"import torch\n",
|
|
||||||
"import pandas as pd\n",
|
|
||||||
"from sklearn.preprocessing import LabelEncoder\n",
|
|
||||||
"from sklearn.decomposition import PCA\n",
|
|
||||||
"from sklearn.preprocessing import StandardScaler\n",
|
|
||||||
"from sklearn.model_selection import train_test_split\n",
|
|
||||||
"from typing import List, Tuple\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"class SplittedDataset(Dataset):\n",
|
|
||||||
" def __init__(self, dataframe):\n",
|
|
||||||
" self.dataframe = dataframe\n",
|
|
||||||
" self.genome_frequencies = torch.tensor(dataframe['pca_frequencies'].tolist(), dtype=torch.float32)\n",
|
|
||||||
" self.cancer_types = torch.tensor(dataframe['encoded_cancer_type'].tolist(), dtype=torch.long)\n",
|
|
||||||
"\n",
|
|
||||||
" def __getitem__(self, index):\n",
|
|
||||||
" return self.genome_frequencies[index], self.cancer_types[index]\n",
|
|
||||||
"\n",
|
|
||||||
" def __len__(self):\n",
|
|
||||||
" return len(self.dataframe)\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"class GenomeDataset(Dataset):\n",
|
|
||||||
" \"\"\"\n",
|
|
||||||
" Eine benutzerdefinierte Dataset-Klasse, die für die Handhabung von Genomdaten konzipiert ist.\n",
|
|
||||||
" Diese Klasse wendet eine Principal Component Analysis (PCA) auf die Frequenzen der Genome an\n",
|
|
||||||
" und teilt den Datensatz in Trainings- und Validierungsteile auf.\n",
|
|
||||||
"\n",
|
|
||||||
" Attributes:\n",
|
|
||||||
" dataframe (pd.DataFrame): Ein Pandas DataFrame, der die initialen Daten enthält.\n",
|
|
||||||
" train_df (pd.DataFrame): Ein DataFrame, der den Trainingsdatensatz nach der Anwendung von PCA und der Aufteilung enthält.\n",
|
|
||||||
" val_df (pd.DataFrame): Ein DataFrame, der den Validierungsdatensatz nach der Anwendung von PCA und der Aufteilung enthält.\n",
|
|
||||||
"\n",
|
|
||||||
" Methods:\n",
|
|
||||||
" __init__(self, dataframe, n_pca_components=1034, train_size=0.8, split_random_state=42):\n",
|
|
||||||
" Konstruktor für die GenomeDataset Klasse.\n",
|
|
||||||
" _do_PCA(self, frequencies, n_components=1034):\n",
|
|
||||||
" Wendet PCA auf die gegebenen Frequenzen an.\n",
|
|
||||||
" _split_dataset(self, train_size=0.8, random_state=42):\n",
|
|
||||||
" Teilt den DataFrame in Trainings- und Validierungsdatensätze auf.\n",
|
|
||||||
" __getitem__(self, index):\n",
|
|
||||||
" Gibt ein Tupel aus transformierten Frequenzen und dem zugehörigen Krebstyp für einen gegebenen Index zurück.\n",
|
|
||||||
" __len__(self):\n",
|
|
||||||
" Gibt die Gesamtlänge der kombinierten Trainings- und Validierungsdatensätze zurück.\n",
|
|
||||||
" \"\"\"\n",
|
|
||||||
"\n",
|
|
||||||
" def __init__(self, dataframe: pd.DataFrame, n_pca_components: int = 1034, train_size: float = 0.8, split_random_state: int = 42):\n",
|
|
||||||
" \"\"\"\n",
|
|
||||||
" Konstruktor für die GenomeDataset Klasse.\n",
|
|
||||||
"\n",
|
|
||||||
" Parameters:\n",
|
|
||||||
" dataframe (pd.DataFrame): Der DataFrame, der die Genome Frequenzen und Krebsarten enthält.\n",
|
|
||||||
" n_pca_components (int): Die Anzahl der PCA-Komponenten, auf die reduziert werden soll. Standardwert ist 1034.\n",
|
|
||||||
" train_size (float): Der Anteil der Daten, der als Trainingsdaten verwendet werden soll. Standardwert ist 0.8.\n",
|
|
||||||
" split_random_state (int): Der Zufalls-Saatwert, der für die Aufteilung des Datensatzes verwendet wird. Standardwert ist 42.\n",
|
|
||||||
" \"\"\"\n",
|
|
||||||
" self.dataframe = dataframe\n",
|
|
||||||
"\n",
|
|
||||||
" # Umwandlung der Krebsarten in numerische Werte\n",
|
|
||||||
" self.label_encoder = LabelEncoder()\n",
|
|
||||||
" self.dataframe['encoded_cancer_type'] = self.label_encoder.fit_transform(dataframe['cancer_type'])\n",
|
|
||||||
"\n",
|
|
||||||
" # Anwenden der PCA auf die Frequenzen\n",
|
|
||||||
" self.dataframe['pca_frequencies'] = self._do_PCA(self.dataframe['genome_frequencies'].tolist(), n_pca_components)\n",
|
|
||||||
"\n",
|
|
||||||
" # Teilen des DataFrame in Trainings- und Validierungsdatensatz\n",
|
|
||||||
" self._split_dataset(train_size=train_size, random_state=split_random_state)\n",
|
|
||||||
"\n",
|
|
||||||
" def transform_datapoint(self, datapoint: List[float]) -> List[float]:\n",
|
|
||||||
" \"\"\"\n",
|
|
||||||
" Transformiert einen einzelnen Datenpunkt durch Standardisierung und Anwendung der PCA.\n",
|
|
||||||
"\n",
|
|
||||||
" Diese Methode nimmt einen rohen Datenpunkt (eine Liste von Frequenzen), standardisiert ihn mit dem \n",
|
|
||||||
" zuvor angepassten Scaler und wendet dann die PCA-Transformation an, um ihn in den reduzierten \n",
|
|
||||||
" Feature-Raum zu überführen, der für das Training des Modells verwendet wurde.\n",
|
|
||||||
"\n",
|
|
||||||
" Parameters:\n",
|
|
||||||
" datapoint (List[float]): Ein roher Datenpunkt, bestehend aus einer Liste von Frequenzen.\n",
|
|
||||||
"\n",
|
|
||||||
" Returns:\n",
|
|
||||||
" List[float]: Der transformierte Datenpunkt, nach Anwendung der Standardisierung und der PCA.\n",
|
|
||||||
" \"\"\"\n",
|
|
||||||
" # Standardisierung des Datenpunkts\n",
|
|
||||||
" scaled_data_point = self.scaler.transform([datapoint])\n",
|
|
||||||
"\n",
|
|
||||||
" # PCA-Transformation des standardisierten Datenpunkts\n",
|
|
||||||
" pca_transformed_point = self.pca.transform(scaled_data_point)\n",
|
|
||||||
"\n",
|
|
||||||
" return pca_transformed_point.tolist()\n",
|
|
||||||
"\n",
|
|
||||||
" def _do_PCA(self, frequencies: List[List[float]], n_components: int = 1034) -> List[List[float]]:\n",
|
|
||||||
" \"\"\"\n",
|
|
||||||
" Wendet PCA auf die gegebenen Frequenzen an.\n",
|
|
||||||
"\n",
|
|
||||||
" Parameters:\n",
|
|
||||||
" frequencies (List[List[float]]): Die Liste der Frequenzen, auf die die PCA angewendet werden soll.\n",
|
|
||||||
" n_components (int): Die Anzahl der Komponenten für die PCA. Standardwert ist 1034.\n",
|
|
||||||
"\n",
|
|
||||||
" Returns:\n",
|
|
||||||
" List[List[float]]: Eine Liste von Listen, die die transformierten Frequenzen nach der PCA darstellt.\n",
|
|
||||||
" \"\"\"\n",
|
|
||||||
"\n",
|
|
||||||
" # Standardisieren der Frequenzen\n",
|
|
||||||
" self.scaler = StandardScaler()\n",
|
|
||||||
" scaled_frequencies = self.scaler.fit_transform(frequencies)\n",
|
|
||||||
"\n",
|
|
||||||
" # PCA-Instanz erstellen und auf die gewünschte Anzahl von Komponenten reduzieren\n",
|
|
||||||
" self.pca = PCA(n_components=n_components)\n",
|
|
||||||
"\n",
|
|
||||||
" # PCA auf die Frequenzen anwenden\n",
|
|
||||||
" pca_result = self.pca.fit_transform(scaled_frequencies)\n",
|
|
||||||
"\n",
|
|
||||||
" return pca_result.tolist()\n",
|
|
||||||
"\n",
|
|
||||||
" def _split_dataset(self, train_size: float = 0.8, random_state: int = 42):\n",
|
|
||||||
" \"\"\"\n",
|
|
||||||
" Teilt den DataFrame in Trainings- und Validierungsdatensätze auf.\n",
|
|
||||||
"\n",
|
|
||||||
" Parameters:\n",
|
|
||||||
" train_size (float): Der Anteil der Daten, der als Trainingsdaten verwendet werden soll.\n",
|
|
||||||
" random_state (int): Der Zufalls-Saatwert, der für die Aufteilung des Datensatzes verwendet wird.\n",
|
|
||||||
" \"\"\"\n",
|
|
||||||
"\n",
|
|
||||||
" # Teilen des DataFrame in Trainings- und Validierungsdatensatz\n",
|
|
||||||
" train_df, val_df = train_test_split(self.dataframe, train_size=train_size, random_state=random_state)\n",
|
|
||||||
" self.train_df = SplittedDataset(train_df)\n",
|
|
||||||
" self.val_df = SplittedDataset(val_df)\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
" def __getitem__(self, index: int) -> Tuple[torch.Tensor, int]:\n",
|
|
||||||
" \"\"\"\n",
|
|
||||||
" Gibt ein Tupel aus transformierten Frequenzen und dem entsprechenden Krebstyp für einen gegebenen Index zurück.\n",
|
|
||||||
"\n",
|
|
||||||
" Parameters:\n",
|
|
||||||
" index (int): Der Index des zu abrufenden Datenelements.\n",
|
|
||||||
"\n",
|
|
||||||
" Returns:\n",
|
|
||||||
" Tuple[torch.Tensor, int]: Ein Tupel, bestehend aus einem Tensor der transformierten Frequenzen und dem zugehörigen Krebstyp.\n",
|
|
||||||
" \"\"\"\n",
|
|
||||||
"\n",
|
|
||||||
" print(self.train_df.shape)\n",
|
|
||||||
" print(self.val_df.shape)\n",
|
|
||||||
" \n",
|
|
||||||
" if index < len(self.train_df):\n",
|
|
||||||
" row = self.train_df.iloc[index]\n",
|
|
||||||
" else:\n",
|
|
||||||
" row = self.val_df.iloc[len(self.train_df) - index]\n",
|
|
||||||
"\n",
|
|
||||||
" pca_frequencies_tensor = torch.tensor(row['pca_frequencies'], dtype=torch.float32)\n",
|
|
||||||
" cancer_type = row['encoded_cancer_type']\n",
|
|
||||||
"\n",
|
|
||||||
" return pca_frequencies_tensor, cancer_type\n",
|
|
||||||
"\n",
|
|
||||||
" def __len__(self) -> int:\n",
|
|
||||||
" \"\"\"\n",
|
|
||||||
" Gibt die Gesamtlänge der kombinierten Trainings- und Validierungsdatensätze zurück.\n",
|
|
||||||
"\n",
|
|
||||||
" Returns:\n",
|
|
||||||
" int: Die Länge der kombinierten Datensätze.\n",
|
|
||||||
" \"\"\"\n",
|
|
||||||
" \n",
|
|
||||||
" return len(self.train_df) + len(self.val_df)\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# Definition des neuronalen Netzes"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 3,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from torch.utils.data import DataLoader\n",
|
|
||||||
"from multiprocessing import cpu_count\n",
|
|
||||||
"import os\n",
|
|
||||||
"import neat\n",
|
|
||||||
"import numpy as np\n",
|
|
||||||
"\n",
|
|
||||||
"class CancerClassifierNEAT():\n",
|
|
||||||
" def __init__(self, dataframe: pd.DataFrame, n_pca_components: int = 64) -> None:\n",
|
|
||||||
" self.num_generations = None\n",
|
|
||||||
" self.n_pca_components = n_pca_components\n",
|
|
||||||
" # Initialisierung der config Datei\n",
|
|
||||||
" local_dir = os.getcwd()\n",
|
|
||||||
" config_path = os.path.join(local_dir, 'config')\n",
|
|
||||||
" config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction, neat.DefaultSpeciesSet, neat.DefaultStagnation, config_path)\n",
|
|
||||||
" # Initialisierung der Population\n",
|
|
||||||
" self.pop = neat.Population(config)\n",
|
|
||||||
" stats = neat.StatisticsReporter()\n",
|
|
||||||
" self.pop.add_reporter(stats)\n",
|
|
||||||
" self.pop.add_reporter(neat.StdOutReporter(True))\n",
|
|
||||||
" # Erstellen des Datensatzes\n",
|
|
||||||
" genome_dataset = GenomeDataset(data_frame, n_pca_components=n_pca_components)\n",
|
|
||||||
" self.train_dataset = genome_dataset.train_df\n",
|
|
||||||
" self.valid_dataset = genome_dataset.val_df\n",
|
|
||||||
" # Erstellen der Datensatzloader\n",
|
|
||||||
" self.train_loader = DataLoader(dataset=self.train_dataset, batch_size=1, shuffle=True)\n",
|
|
||||||
" self.valid_loader = DataLoader(dataset=self.valid_dataset, batch_size=1, shuffle=False)\n",
|
|
||||||
"\n",
|
|
||||||
" self.training_accuracies = []\n",
|
|
||||||
" self.validation_accuracies = []\n",
|
|
||||||
"\n",
|
|
||||||
" def fitness(self, genome: neat.DefaultGenome, config) -> float:\n",
|
|
||||||
" net = neat.nn.FeedForwardNetwork.create(genome, config)\n",
|
|
||||||
"\n",
|
|
||||||
" correct_predictions = 0\n",
|
|
||||||
" total_predictions = 0\n",
|
|
||||||
"\n",
|
|
||||||
" for i, (inputs, labels) in enumerate(self.train_loader):\n",
|
|
||||||
" inputs_list = inputs.view(-1).tolist()\n",
|
|
||||||
" #print(inputs_list)\n",
|
|
||||||
" # Netz aktivieren\n",
|
|
||||||
" outputs = net.activate(inputs_list)\n",
|
|
||||||
" #print(outputs)\n",
|
|
||||||
" # Berechnen der Genauigkeit\n",
|
|
||||||
" predicted = np.argmax(np.array(outputs))\n",
|
|
||||||
" correct_predictions += (predicted == labels).sum().item()\n",
|
|
||||||
" total_predictions += labels.size(0)\n",
|
|
||||||
"\n",
|
|
||||||
" train_accuracy = correct_predictions / total_predictions\n",
|
|
||||||
"\n",
|
|
||||||
" return train_accuracy\n",
|
|
||||||
"\n",
|
|
||||||
" def eval_genomes(self, genomes, config):\n",
|
|
||||||
" for _, genome in genomes:\n",
|
|
||||||
" genome.fitness = self.fitness(genome, config)\n",
|
|
||||||
"\n",
|
|
||||||
" def run(self, num_generations: int = 500) -> neat.DefaultGenome:\n",
|
|
||||||
" #parallel = neat.ParallelEvaluator(cpu_count(), self.fitness)\n",
|
|
||||||
" #winner = self.pop.run(parallel.evaluate, num_generations)\n",
|
|
||||||
" winner = self.pop.run(self.eval_genomes, num_generations)\n",
|
|
||||||
"\n",
|
|
||||||
" return winner"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 5,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"\n",
|
|
||||||
" ****** Running generation 0 ****** \n",
|
|
||||||
"\n",
|
|
||||||
"Population's average fitness: 0.31162 stdev: 0.16467\n",
|
|
||||||
"Best fitness: 0.68803 - size: (13, 431) - species 47 - id 47\n",
|
|
||||||
"Average adjusted fitness: 0.216\n",
|
|
||||||
"Mean genetic distance 3.635, standard deviation 0.325\n",
|
|
||||||
"Population of 500 members in 100 species:\n",
|
|
||||||
" ID age size fitness adj fit stag\n",
|
|
||||||
" ==== === ==== ======= ======= ====\n",
|
|
||||||
" 1 0 5 0.3 0.213 0\n",
|
|
||||||
" 2 0 5 0.4 0.294 0\n",
|
|
||||||
" 3 0 5 0.3 0.189 0\n",
|
|
||||||
" 4 0 5 0.1 0.005 0\n",
|
|
||||||
" 5 0 5 0.4 0.299 0\n",
|
|
||||||
" 6 0 5 0.3 0.210 0\n",
|
|
||||||
" 7 0 5 0.4 0.326 0\n",
|
|
||||||
" 8 0 5 0.2 0.125 0\n",
|
|
||||||
" 9 0 5 0.6 0.463 0\n",
|
|
||||||
" 10 0 5 0.2 0.149 0\n",
|
|
||||||
" 11 0 5 0.2 0.134 0\n",
|
|
||||||
" 12 0 5 0.5 0.447 0\n",
|
|
||||||
" 13 0 5 0.6 0.497 0\n",
|
|
||||||
" 14 0 5 0.6 0.489 0\n",
|
|
||||||
" 15 0 5 0.3 0.213 0\n",
|
|
||||||
" 16 0 5 0.1 0.018 0\n",
|
|
||||||
" 17 0 5 0.3 0.173 0\n",
|
|
||||||
" 18 0 5 0.3 0.197 0\n",
|
|
||||||
" 19 0 5 0.2 0.110 0\n",
|
|
||||||
" 20 0 5 0.2 0.109 0\n",
|
|
||||||
" 21 0 5 0.1 0.034 0\n",
|
|
||||||
" 22 0 5 0.4 0.324 0\n",
|
|
||||||
" 23 0 5 0.2 0.109 0\n",
|
|
||||||
" 24 0 5 0.2 0.060 0\n",
|
|
||||||
" 25 0 5 0.2 0.149 0\n",
|
|
||||||
" 26 0 5 0.3 0.213 0\n",
|
|
||||||
" 27 0 5 0.3 0.244 0\n",
|
|
||||||
" 28 0 5 0.1 0.018 0\n",
|
|
||||||
" 29 0 5 0.1 0.007 0\n",
|
|
||||||
" 30 0 5 0.3 0.215 0\n",
|
|
||||||
" 31 0 5 0.1 0.006 0\n",
|
|
||||||
" 32 0 5 0.6 0.475 0\n",
|
|
||||||
" 33 0 5 0.4 0.276 0\n",
|
|
||||||
" 34 0 5 0.2 0.115 0\n",
|
|
||||||
" 35 0 5 0.1 0.050 0\n",
|
|
||||||
" 36 0 5 0.6 0.481 0\n",
|
|
||||||
" 37 0 5 0.4 0.341 0\n",
|
|
||||||
" 38 0 5 0.3 0.247 0\n",
|
|
||||||
" 39 0 5 0.5 0.430 0\n",
|
|
||||||
" 40 0 5 0.1 0.021 0\n",
|
|
||||||
" 41 0 5 0.1 0.044 0\n",
|
|
||||||
" 42 0 5 0.2 0.093 0\n",
|
|
||||||
" 43 0 5 0.2 0.088 0\n",
|
|
||||||
" 44 0 5 0.3 0.189 0\n",
|
|
||||||
" 45 0 5 0.1 0.033 0\n",
|
|
||||||
" 46 0 5 0.2 0.074 0\n",
|
|
||||||
" 47 0 5 0.7 0.593 0\n",
|
|
||||||
" 48 0 5 0.4 0.319 0\n",
|
|
||||||
" 49 0 5 0.3 0.201 0\n",
|
|
||||||
" 50 0 5 0.5 0.385 0\n",
|
|
||||||
" 51 0 5 0.1 0.051 0\n",
|
|
||||||
" 52 0 5 0.6 0.496 0\n",
|
|
||||||
" 53 0 5 0.6 0.463 0\n",
|
|
||||||
" 54 0 5 0.1 0.031 0\n",
|
|
||||||
" 55 0 5 0.2 0.122 0\n",
|
|
||||||
" 56 0 5 0.1 0.036 0\n",
|
|
||||||
" 57 0 5 0.1 0.004 0\n",
|
|
||||||
" 58 0 5 0.3 0.197 0\n",
|
|
||||||
" 59 0 5 0.1 0.027 0\n",
|
|
||||||
" 60 0 5 0.1 0.022 0\n",
|
|
||||||
" 61 0 5 0.1 0.000 0\n",
|
|
||||||
" 62 0 5 0.3 0.192 0\n",
|
|
||||||
" 63 0 5 0.7 0.572 0\n",
|
|
||||||
" 64 0 5 0.4 0.274 0\n",
|
|
||||||
" 65 0 5 0.2 0.138 0\n",
|
|
||||||
" 66 0 5 0.1 0.008 0\n",
|
|
||||||
" 67 0 5 0.3 0.179 0\n",
|
|
||||||
" 68 0 5 0.2 0.097 0\n",
|
|
||||||
" 69 0 5 0.3 0.252 0\n",
|
|
||||||
" 70 0 5 0.3 0.214 0\n",
|
|
||||||
" 71 0 5 0.6 0.499 0\n",
|
|
||||||
" 72 0 5 0.2 0.140 0\n",
|
|
||||||
" 73 0 5 0.1 0.000 0\n",
|
|
||||||
" 74 0 5 0.3 0.219 0\n",
|
|
||||||
" 75 0 5 0.3 0.204 0\n",
|
|
||||||
" 76 0 5 0.5 0.405 0\n",
|
|
||||||
" 77 0 5 0.4 0.300 0\n",
|
|
||||||
" 78 0 5 0.3 0.213 0\n",
|
|
||||||
" 79 0 5 0.6 0.492 0\n",
|
|
||||||
" 80 0 5 0.1 0.017 0\n",
|
|
||||||
" 81 0 5 0.6 0.501 0\n",
|
|
||||||
" 82 0 5 0.3 0.213 0\n",
|
|
||||||
" 83 0 5 0.5 0.450 0\n",
|
|
||||||
" 84 0 5 0.6 0.493 0\n",
|
|
||||||
" 85 0 5 0.2 0.111 0\n",
|
|
||||||
" 86 0 5 0.5 0.421 0\n",
|
|
||||||
" 87 0 5 0.5 0.397 0\n",
|
|
||||||
" 88 0 5 0.6 0.502 0\n",
|
|
||||||
" 89 0 5 0.3 0.156 0\n",
|
|
||||||
" 90 0 5 0.4 0.342 0\n",
|
|
||||||
" 91 0 5 0.1 0.000 0\n",
|
|
||||||
" 92 0 5 0.3 0.209 0\n",
|
|
||||||
" 93 0 5 0.1 0.017 0\n",
|
|
||||||
" 94 0 5 0.3 0.160 0\n",
|
|
||||||
" 95 0 5 0.2 0.097 0\n",
|
|
||||||
" 96 0 5 0.5 0.380 0\n",
|
|
||||||
" 97 0 5 0.2 0.081 0\n",
|
|
||||||
" 98 0 5 0.5 0.416 0\n",
|
|
||||||
" 99 0 5 0.2 0.087 0\n",
|
|
||||||
" 100 0 5 0.3 0.222 0\n",
|
|
||||||
"Total extinctions: 0\n",
|
|
||||||
"Generation time: 56.773 sec\n",
|
|
||||||
"\n",
|
|
||||||
" ****** Running generation 1 ****** \n",
|
|
||||||
"\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"ename": "KeyboardInterrupt",
|
|
||||||
"evalue": "",
|
|
||||||
"output_type": "error",
|
|
||||||
"traceback": [
|
|
||||||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
|
||||||
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
|
|
||||||
"Cell \u001b[0;32mIn[5], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m ea \u001b[39m=\u001b[39m CancerClassifierNEAT(data_frame)\n\u001b[0;32m----> 2\u001b[0m ea\u001b[39m.\u001b[39;49mrun()\n",
|
|
||||||
"Cell \u001b[0;32mIn[3], line 59\u001b[0m, in \u001b[0;36mCancerClassifierNEAT.run\u001b[0;34m(self, num_generations)\u001b[0m\n\u001b[1;32m 56\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mrun\u001b[39m(\u001b[39mself\u001b[39m, num_generations: \u001b[39mint\u001b[39m \u001b[39m=\u001b[39m \u001b[39m500\u001b[39m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m neat\u001b[39m.\u001b[39mDefaultGenome:\n\u001b[1;32m 57\u001b[0m \u001b[39m#parallel = neat.ParallelEvaluator(cpu_count(), self.fitness)\u001b[39;00m\n\u001b[1;32m 58\u001b[0m \u001b[39m#winner = self.pop.run(parallel.evaluate, num_generations)\u001b[39;00m\n\u001b[0;32m---> 59\u001b[0m winner \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mpop\u001b[39m.\u001b[39;49mrun(\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49meval_genomes, num_generations)\n\u001b[1;32m 61\u001b[0m \u001b[39mreturn\u001b[39;00m winner\n",
|
|
||||||
"File \u001b[0;32m~/.local/lib/python3.8/site-packages/neat/population.py:89\u001b[0m, in \u001b[0;36mPopulation.run\u001b[0;34m(self, fitness_function, n)\u001b[0m\n\u001b[1;32m 86\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mreporters\u001b[39m.\u001b[39mstart_generation(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mgeneration)\n\u001b[1;32m 88\u001b[0m \u001b[39m# Evaluate all genomes using the user-provided function.\u001b[39;00m\n\u001b[0;32m---> 89\u001b[0m fitness_function(\u001b[39mlist\u001b[39;49m(iteritems(\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mpopulation)), \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mconfig)\n\u001b[1;32m 91\u001b[0m \u001b[39m# Gather and report statistics.\u001b[39;00m\n\u001b[1;32m 92\u001b[0m best \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n",
|
|
||||||
"Cell \u001b[0;32mIn[3], line 54\u001b[0m, in \u001b[0;36mCancerClassifierNEAT.eval_genomes\u001b[0;34m(self, genomes, config)\u001b[0m\n\u001b[1;32m 52\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39meval_genomes\u001b[39m(\u001b[39mself\u001b[39m, genomes, config):\n\u001b[1;32m 53\u001b[0m \u001b[39mfor\u001b[39;00m _, genome \u001b[39min\u001b[39;00m genomes:\n\u001b[0;32m---> 54\u001b[0m genome\u001b[39m.\u001b[39mfitness \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mfitness(genome, config)\n",
|
|
||||||
"Cell \u001b[0;32mIn[3], line 41\u001b[0m, in \u001b[0;36mCancerClassifierNEAT.fitness\u001b[0;34m(self, genome, config)\u001b[0m\n\u001b[1;32m 38\u001b[0m inputs_list \u001b[39m=\u001b[39m inputs\u001b[39m.\u001b[39mview(\u001b[39m-\u001b[39m\u001b[39m1\u001b[39m)\u001b[39m.\u001b[39mtolist()\n\u001b[1;32m 39\u001b[0m \u001b[39m#print(inputs_list)\u001b[39;00m\n\u001b[1;32m 40\u001b[0m \u001b[39m# Netz aktivieren\u001b[39;00m\n\u001b[0;32m---> 41\u001b[0m outputs \u001b[39m=\u001b[39m net\u001b[39m.\u001b[39;49mactivate(inputs_list)\n\u001b[1;32m 42\u001b[0m \u001b[39m#print(outputs)\u001b[39;00m\n\u001b[1;32m 43\u001b[0m \u001b[39m# Berechnen der Genauigkeit\u001b[39;00m\n\u001b[1;32m 44\u001b[0m predicted \u001b[39m=\u001b[39m np\u001b[39m.\u001b[39margmax(np\u001b[39m.\u001b[39marray(outputs))\n",
|
|
||||||
"File \u001b[0;32m~/.local/lib/python3.8/site-packages/neat/nn/feed_forward.py:22\u001b[0m, in \u001b[0;36mFeedForwardNetwork.activate\u001b[0;34m(self, inputs)\u001b[0m\n\u001b[1;32m 20\u001b[0m node_inputs \u001b[39m=\u001b[39m []\n\u001b[1;32m 21\u001b[0m \u001b[39mfor\u001b[39;00m i, w \u001b[39min\u001b[39;00m links:\n\u001b[0;32m---> 22\u001b[0m node_inputs\u001b[39m.\u001b[39;49mappend(\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mvalues[i] \u001b[39m*\u001b[39;49m w)\n\u001b[1;32m 23\u001b[0m s \u001b[39m=\u001b[39m agg_func(node_inputs)\n\u001b[1;32m 24\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mvalues[node] \u001b[39m=\u001b[39m act_func(bias \u001b[39m+\u001b[39m response \u001b[39m*\u001b[39m s)\n",
|
|
||||||
"\u001b[0;31mKeyboardInterrupt\u001b[0m: "
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"ea = CancerClassifierNEAT(data_frame)\n",
|
|
||||||
"ea.run()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from IPython.display import clear_output\n",
|
|
||||||
"import matplotlib.pyplot as plt\n",
|
|
||||||
"\n",
|
|
||||||
"# Listen, um Verluste zu speichern\n",
|
|
||||||
"train_losses = []\n",
|
|
||||||
"valid_losses = []\n",
|
|
||||||
"train_accuracies = []\n",
|
|
||||||
"valid_accuracies = []\n",
|
|
||||||
"\n",
|
|
||||||
"for epoch in range(num_epochs):\n",
|
|
||||||
" model.train()\n",
|
|
||||||
" train_loss = 0.0\n",
|
|
||||||
" correct_predictions = 0\n",
|
|
||||||
" total_predictions = 0\n",
|
|
||||||
"\n",
|
|
||||||
" for i, (inputs, labels) in enumerate(train_loader):\n",
|
|
||||||
" inputs, labels = inputs.to(device), labels.to(device)\n",
|
|
||||||
" optimizer.zero_grad()\n",
|
|
||||||
" outputs = model(inputs)\n",
|
|
||||||
" loss = criterion(outputs, labels)\n",
|
|
||||||
" loss.backward()\n",
|
|
||||||
" optimizer.step()\n",
|
|
||||||
" train_loss += loss.item()\n",
|
|
||||||
"\n",
|
|
||||||
" # Berechnen der Genauigkeit\n",
|
|
||||||
" _, predicted = torch.max(outputs, 1)\n",
|
|
||||||
" correct_predictions += (predicted == labels).sum().item()\n",
|
|
||||||
" total_predictions += labels.size(0)\n",
|
|
||||||
"\n",
|
|
||||||
" # Durchschnittlicher Trainingsverlust und Genauigkeit\n",
|
|
||||||
" train_loss /= len(train_loader)\n",
|
|
||||||
" train_accuracy = correct_predictions / total_predictions\n",
|
|
||||||
" train_losses.append(train_loss)\n",
|
|
||||||
" train_accuracies.append(train_accuracy)\n",
|
|
||||||
"\n",
|
|
||||||
" # Validierungsverlust und Genauigkeit\n",
|
|
||||||
" model.eval()\n",
|
|
||||||
" valid_loss = 0.0\n",
|
|
||||||
" correct_predictions = 0\n",
|
|
||||||
" total_predictions = 0\n",
|
|
||||||
"\n",
|
|
||||||
" with torch.no_grad():\n",
|
|
||||||
" for inputs, labels in valid_loader:\n",
|
|
||||||
" inputs, labels = inputs.to(device), labels.to(device)\n",
|
|
||||||
" outputs = model(inputs)\n",
|
|
||||||
" loss = criterion(outputs, labels)\n",
|
|
||||||
" valid_loss += loss.item()\n",
|
|
||||||
"\n",
|
|
||||||
" # Berechnen der Genauigkeit\n",
|
|
||||||
" _, predicted = torch.max(outputs, 1)\n",
|
|
||||||
" correct_predictions += (predicted == labels).sum().item()\n",
|
|
||||||
" total_predictions += labels.size(0)\n",
|
|
||||||
"\n",
|
|
||||||
" # Durchschnittlicher Validierungsverlust und Genauigkeit\n",
|
|
||||||
" valid_loss /= len(valid_loader)\n",
|
|
||||||
" valid_accuracy = correct_predictions / total_predictions\n",
|
|
||||||
" valid_losses.append(valid_loss)\n",
|
|
||||||
" valid_accuracies.append(valid_accuracy)\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
" # Aktualisieren des Graphen\n",
|
|
||||||
" clear_output(wait=True)\n",
|
|
||||||
" fig, ax1 = plt.subplots()\n",
|
|
||||||
"\n",
|
|
||||||
" # Zeichnen der Verlustkurven\n",
|
|
||||||
" ax1.plot(train_losses, label='Trainingsverlust', color='r')\n",
|
|
||||||
" ax1.plot(valid_losses, label='Validierungsverlust', color='b')\n",
|
|
||||||
" ax1.set_xlabel('Epochen')\n",
|
|
||||||
" ax1.set_ylabel('Verlust', color='g')\n",
|
|
||||||
" ax1.tick_params(axis='y', labelcolor='g')\n",
|
|
||||||
"\n",
|
|
||||||
" # Zweite y-Achse für die Genauigkeit\n",
|
|
||||||
" ax2 = ax1.twinx()\n",
|
|
||||||
" ax2.plot(train_accuracies, label='Trainingsgenauigkeit', color='r', linestyle='dashed')\n",
|
|
||||||
" ax2.plot(valid_accuracies, label='Validierungsgenauigkeit', color='b', linestyle='dashed')\n",
|
|
||||||
" ax2.set_ylabel('Genauigkeit', color='g')\n",
|
|
||||||
" ax2.tick_params(axis='y', labelcolor='g')\n",
|
|
||||||
"\n",
|
|
||||||
" # Titel und Legende\n",
|
|
||||||
" plt.title('Trainings- und Validierungsverlust und -genauigkeit über die Zeit')\n",
|
|
||||||
" fig.tight_layout()\n",
|
|
||||||
" ax1.legend(loc='lower left')\n",
|
|
||||||
" ax2.legend(loc='lower right')\n",
|
|
||||||
"\n",
|
|
||||||
" plt.show()\n",
|
|
||||||
"\n",
|
|
||||||
" print(f'Epoch [{epoch+1}/{num_epochs}], Trainingsverlust: {train_loss:.4f}, Validierungsverlust: {valid_loss:.4f}')"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "rl",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python3"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.8.18"
|
|
||||||
},
|
|
||||||
"orig_nbformat": 4
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 2
|
|
||||||
}
|
|
Loading…
Reference in New Issue