diff --git a/src/main/resources/master_data/excel_to_sql_converter_nodes.py b/src/main/resources/master_data/excel_to_sql_converter_nodes.py new file mode 100644 index 0000000..38bb5c3 --- /dev/null +++ b/src/main/resources/master_data/excel_to_sql_converter_nodes.py @@ -0,0 +1,211 @@ +#!/usr/bin/env python3 +""" +Excel zu SQL Konverter für Nodes +Konvertiert nodes.xlsx in SQL INSERT Statements unter Verwendung von countries.xlsx für ISO-Codes +""" + +import pandas as pd +import sys +import re +from pathlib import Path + +def load_countries_mapping(countries_file): + """ + Lädt die Länder-zu-ISO-Code Mapping aus der countries.xlsx Datei + """ + try: + # Lade die countries.xlsx Datei - verwende das erste Sheet + countries_df = pd.read_excel(countries_file, sheet_name=0) + + # Erstelle ein Mapping von Ländernamen zu ISO-Codes + country_mapping = {} + for _, row in countries_df.iterrows(): + country_name = str(row['Country']).strip() + iso_code = str(row['Country code']).strip() + country_mapping[country_name] = iso_code + + print(f"Erfolgreich {len(country_mapping)} Länder-Mappings geladen") + return country_mapping + + except Exception as e: + print(f"Fehler beim Laden der countries.xlsx: {e}") + sys.exit(1) + +def parse_geo_position(geo_str): + """ + Parst die Geo-Position aus dem Format "lat, lng" und rundet auf 4 Nachkommastellen + """ + if pd.isna(geo_str) or str(geo_str).strip() == '': + return None, None + + try: + # Entferne Leerzeichen und splitte bei Komma + coords = str(geo_str).strip().split(',') + if len(coords) == 2: + lat = round(float(coords[0].strip()), 4) + lng = round(float(coords[1].strip()), 4) + return lat, lng + else: + return None, None + except ValueError: + return None, None + +def determine_node_type(type_str): + """ + Bestimmt die Node-Typen basierend auf dem "Type" Feld + """ + if pd.isna(type_str): + return False, False, False # is_destination, is_source, is_intermediate + + type_lower = str(type_str).lower().strip() + + print(type_lower) + + # Standard: alle False, außer für spezifische Typen + is_destination = False + is_source = False + is_intermediate = False + + if 'sink' in type_lower: + is_destination = True + if 'source' in type_lower: + is_source = True + if 'intermediate' in type_lower: + is_intermediate = True + if is_intermediate == False and is_source == False and is_destination == False: + raise Exception(f"no node type in {type_lower} " ) + + return is_destination, is_source, is_intermediate + +def escape_sql_string(value): + """ + Escaped SQL-Strings für sichere Einfügung + """ + if pd.isna(value): + return 'NULL' + + # Konvertiere zu String und escape single quotes + str_value = str(value).replace("'", "''") + return f"'{str_value}'" + +def convert_nodes_to_sql(nodes_file, countries_file, output_file): + """ + Hauptfunktion: Konvertiert nodes.xlsx zu SQL INSERT Statements + """ + + # Lade Länder-Mapping + country_mapping = load_countries_mapping(countries_file) + + try: + # Lade nodes.xlsx + nodes_df = pd.read_excel(nodes_file, sheet_name='Tabelle1') + print(f"Erfolgreich {len(nodes_df)} Nodes geladen") + + except Exception as e: + print(f"Fehler beim Laden der nodes.xlsx: {e}") + sys.exit(1) + + # Öffne Output-Datei + try: + with open(output_file, 'w', encoding='utf-8') as f: + f.write("-- Generated SQL INSERT statements for nodes\n") + f.write("-- Generated from nodes.xlsx using countries.xlsx for ISO code mapping\n\n") + + # Iteriere über alle Zeilen + for index, row in nodes_df.iterrows(): + node_id = index + 1 # 1-basiert + + # Extrahiere Daten + external_mapping_id = str(row['external mapping id']).strip() if pd.notna(row['external mapping id']) else None + name = str(row['Name']).strip() if pd.notna(row['Name']) else None + address = str(row['Address']).strip() if pd.notna(row['Address']) else None + country_name = str(row['Country ']).strip() if pd.notna(row['Country ']) else None # Beachte das Leerzeichen im Spaltennamen + + # Prüfe, ob Country-Name im Mapping existiert + if not country_name or country_name not in country_mapping: + print(f"FEHLER: Land '{country_name}' in Zeile {node_id} nicht in countries.xlsx gefunden!") + print(f"Verfügbare Länder: {sorted(country_mapping.keys())}") + sys.exit(1) + + iso_code = country_mapping[country_name] + + # Parse Geo-Position + geo_lat, geo_lng = parse_geo_position(row['Geo position']) + + # Bestimme Node-Typen + is_destination, is_source, is_intermediate = determine_node_type(row['Type']) + + print(f"{node_id} {name}: {is_destination}, {is_source}, {is_intermediate}") + + # Predecessor required + predecessor_required = False + if pd.notna(row['predecessor_required']): + pred_str = str(row['predecessor_required']).lower().strip() + predecessor_required = pred_str in ['yes', 'true', '1', 'ja'] + + # Schreibe SQL INSERT Statement + f.write(f"-- Node {node_id}: {name or 'Unknown'}\n") + f.write("INSERT INTO node (\n") + f.write(" id,\n") + f.write(" country_id,\n") + f.write(" name,\n") + f.write(" address,\n") + f.write(" external_mapping_id,\n") + f.write(" predecessor_required,\n") + f.write(" is_destination,\n") + f.write(" is_source,\n") + f.write(" is_intermediate,\n") + f.write(" geo_lat,\n") + f.write(" geo_lng\n") + f.write(") VALUES (\n") + f.write(f" {node_id},\n") + f.write(f" (SELECT id FROM country WHERE iso_code = '{iso_code}'),\n") + f.write(f" {escape_sql_string(name)},\n") + f.write(f" {escape_sql_string(address)},\n") + f.write(f" {escape_sql_string(external_mapping_id)},\n") + f.write(f" {'true' if predecessor_required else 'false'},\n") + f.write(f" {'true' if is_destination else 'false'},\n") + f.write(f" {'true' if is_source else 'false'},\n") + f.write(f" {'true' if is_intermediate else 'false'},\n") + f.write(f" {geo_lat if geo_lat is not None else 'NULL'},\n") + f.write(f" {geo_lng if geo_lng is not None else 'NULL'}\n") + f.write(" );\n\n") + + print(f"SQL-Datei erfolgreich erstellt: {output_file}") + print(f"Insgesamt {len(nodes_df)} INSERT Statements generiert") + + except Exception as e: + print(f"Fehler beim Schreiben der SQL-Datei: {e}") + sys.exit(1) + +def main(): + """ + Hauptprogramm + """ + # Standarddateinamen + nodes_file = 'nodes.xlsx' + countries_file = 'countries.xlsx' + output_file = '03-nodes.sql' + + # Prüfe, ob Dateien existieren + if not Path(nodes_file).exists(): + print(f"Fehler: {nodes_file} nicht gefunden!") + sys.exit(1) + + if not Path(countries_file).exists(): + print(f"Fehler: {countries_file} nicht gefunden!") + sys.exit(1) + + print(f"Konvertiere {nodes_file} zu SQL...") + print(f"Verwende {countries_file} für ISO-Code Mapping...") + print(f"Output-Datei: {output_file}") + print("-" * 50) + + # Führe Konvertierung aus + convert_nodes_to_sql(nodes_file, countries_file, output_file) + + print("-" * 50) + print("Konvertierung erfolgreich abgeschlossen!") + +if __name__ == "__main__": + main() diff --git a/src/main/resources/master_data/excel_to_sql_converter_pre_nodes.py b/src/main/resources/master_data/excel_to_sql_converter_pre_nodes.py new file mode 100644 index 0000000..0d6f6bf --- /dev/null +++ b/src/main/resources/master_data/excel_to_sql_converter_pre_nodes.py @@ -0,0 +1,116 @@ +import pandas as pd +import sys +from pathlib import Path + +def convert_excel_to_sql(excel_file_path, output_file_path=None): + """ + Konvertiert eine Excel-Datei mit Node-Predecessor-Daten in SQL-Statements. + + Args: + excel_file_path (str): Pfad zur Excel-Datei + output_file_path (str, optional): Pfad zur Ausgabe-SQL-Datei. + Wenn None, wird automatisch generiert. + """ + try: + # Excel-Datei laden + df = pd.read_excel(excel_file_path) + + # Spalten-Namen bereinigen (falls nötig) + df.columns = df.columns.str.strip() + + # Erwartete Spalten prüfen + expected_columns = ['node', 'Pre-node 1', 'Pre-node 2', 'Pre-node 3'] + if not all(col in df.columns for col in expected_columns): + print(f"Fehler: Erwartete Spalten nicht gefunden. Gefundene Spalten: {list(df.columns)}") + return + + # Ausgabe-Datei festlegen + if output_file_path is None: + output_file_path = Path(excel_file_path).stem + '_converted.sql' + + # SQL-Statements generieren + sql_statements = [] + + # Header-Kommentar hinzufügen + sql_statements.append("-- Automatisch generierte SQL-Statements für Node Predecessor Chains") + sql_statements.append("-- Generiert aus: " + str(excel_file_path)) + sql_statements.append("") + + chain_counter = 1 + + for index, row in df.iterrows(): + node_id = row['node'] + + # Leere Zeilen überspringen + if pd.isna(node_id) or str(node_id).strip() == '': + continue + + # Kommentar für die Chain + sql_statements.append(f"-- Predecessor Chain {chain_counter}: {node_id}") + + # Node Predecessor Chain erstellen + sql_statements.append("INSERT INTO node_predecessor_chain (") + sql_statements.append(" node_id") + sql_statements.append(") VALUES (") + sql_statements.append(f" (SELECT id FROM node WHERE external_mapping_id = '{node_id}')") + sql_statements.append(" );") + sql_statements.append("") + + # Variable für Chain-ID setzen + sql_statements.append(f"SET @chain_id_{chain_counter} = LAST_INSERT_ID();") + sql_statements.append("") + + # Predecessor Entries erstellen (nur wenn nicht leer) + sequence_number = 1 + for pre_node_col in ['Pre-node 1', 'Pre-node 2', 'Pre-node 3']: + pre_node_value = row[pre_node_col] + + # Nur verarbeiten wenn Wert vorhanden ist + if not pd.isna(pre_node_value) and str(pre_node_value).strip() != '': + sql_statements.append("INSERT INTO node_predecessor_entry (") + sql_statements.append(" node_id,") + sql_statements.append(" node_predecessor_chain_id,") + sql_statements.append(" sequence_number") + sql_statements.append(") VALUES (") + sql_statements.append(f" (SELECT id FROM node WHERE external_mapping_id = '{pre_node_value}'),") + sql_statements.append(f" @chain_id_{chain_counter},") + sql_statements.append(f" {sequence_number}") + sql_statements.append(" );") + sql_statements.append("") + + sequence_number += 1 + + chain_counter += 1 + + # SQL-Datei schreiben + with open(output_file_path, 'w', encoding='utf-8') as f: + f.write('\n'.join(sql_statements)) + + print(f"Erfolgreich konvertiert! SQL-Datei erstellt: {output_file_path}") + print(f"Verarbeitete Zeilen: {len(df)}") + print(f"Generierte Chains: {chain_counter - 1}") + + except FileNotFoundError: + print(f"Fehler: Datei '{excel_file_path}' nicht gefunden.") + except Exception as e: + print(f"Fehler beim Verarbeiten der Datei: {str(e)}") + +def main(): + """ + Hauptfunktion für die Kommandozeilen-Nutzung. + """ + if len(sys.argv) < 2: + print("Verwendung: python excel_to_sql.py [ausgabe_datei]") + print("Beispiel: python excel_to_sql.py pre_nodes.xlsx output.sql") + return + + excel_file = sys.argv[1] + output_file = sys.argv[2] if len(sys.argv) > 2 else None + + convert_excel_to_sql(excel_file, output_file) + +if __name__ == "__main__": + main() + +# Beispiel für die direkte Verwendung im Script: +# convert_excel_to_sql('pre_nodes.xlsx', 'predecessor_chains.sql') \ No newline at end of file diff --git a/src/main/resources/master_data/nodes.xlsx b/src/main/resources/master_data/nodes.xlsx new file mode 100644 index 0000000..1c01200 Binary files /dev/null and b/src/main/resources/master_data/nodes.xlsx differ diff --git a/src/main/resources/master_data/pre_nodes.xlsx b/src/main/resources/master_data/pre_nodes.xlsx new file mode 100644 index 0000000..0cda35c Binary files /dev/null and b/src/main/resources/master_data/pre_nodes.xlsx differ