Add Excel-to-SQL conversion scripts for nodes and node predecessor chains

Introduce Python scripts to convert node and node predecessor data from Excel files into SQL INSERT statements. Includes support for ISO code mappings, geo-position parsing, node type determination, and automated chain creation. Also added sample Excel files (`nodes.xlsx`, `pre_nodes.xlsx`) for testing and reference.
This commit is contained in:
Jan 2025-07-19 22:11:58 +02:00
parent a9275a012a
commit ad30f00492
4 changed files with 327 additions and 0 deletions

View file

@ -0,0 +1,211 @@
#!/usr/bin/env python3
"""
Excel zu SQL Konverter für Nodes
Konvertiert nodes.xlsx in SQL INSERT Statements unter Verwendung von countries.xlsx für ISO-Codes
"""
import pandas as pd
import sys
import re
from pathlib import Path
def load_countries_mapping(countries_file):
"""
Lädt die Länder-zu-ISO-Code Mapping aus der countries.xlsx Datei
"""
try:
# Lade die countries.xlsx Datei - verwende das erste Sheet
countries_df = pd.read_excel(countries_file, sheet_name=0)
# Erstelle ein Mapping von Ländernamen zu ISO-Codes
country_mapping = {}
for _, row in countries_df.iterrows():
country_name = str(row['Country']).strip()
iso_code = str(row['Country code']).strip()
country_mapping[country_name] = iso_code
print(f"Erfolgreich {len(country_mapping)} Länder-Mappings geladen")
return country_mapping
except Exception as e:
print(f"Fehler beim Laden der countries.xlsx: {e}")
sys.exit(1)
def parse_geo_position(geo_str):
"""
Parst die Geo-Position aus dem Format "lat, lng" und rundet auf 4 Nachkommastellen
"""
if pd.isna(geo_str) or str(geo_str).strip() == '':
return None, None
try:
# Entferne Leerzeichen und splitte bei Komma
coords = str(geo_str).strip().split(',')
if len(coords) == 2:
lat = round(float(coords[0].strip()), 4)
lng = round(float(coords[1].strip()), 4)
return lat, lng
else:
return None, None
except ValueError:
return None, None
def determine_node_type(type_str):
"""
Bestimmt die Node-Typen basierend auf dem "Type" Feld
"""
if pd.isna(type_str):
return False, False, False # is_destination, is_source, is_intermediate
type_lower = str(type_str).lower().strip()
print(type_lower)
# Standard: alle False, außer für spezifische Typen
is_destination = False
is_source = False
is_intermediate = False
if 'sink' in type_lower:
is_destination = True
if 'source' in type_lower:
is_source = True
if 'intermediate' in type_lower:
is_intermediate = True
if is_intermediate == False and is_source == False and is_destination == False:
raise Exception(f"no node type in {type_lower} " )
return is_destination, is_source, is_intermediate
def escape_sql_string(value):
"""
Escaped SQL-Strings für sichere Einfügung
"""
if pd.isna(value):
return 'NULL'
# Konvertiere zu String und escape single quotes
str_value = str(value).replace("'", "''")
return f"'{str_value}'"
def convert_nodes_to_sql(nodes_file, countries_file, output_file):
"""
Hauptfunktion: Konvertiert nodes.xlsx zu SQL INSERT Statements
"""
# Lade Länder-Mapping
country_mapping = load_countries_mapping(countries_file)
try:
# Lade nodes.xlsx
nodes_df = pd.read_excel(nodes_file, sheet_name='Tabelle1')
print(f"Erfolgreich {len(nodes_df)} Nodes geladen")
except Exception as e:
print(f"Fehler beim Laden der nodes.xlsx: {e}")
sys.exit(1)
# Öffne Output-Datei
try:
with open(output_file, 'w', encoding='utf-8') as f:
f.write("-- Generated SQL INSERT statements for nodes\n")
f.write("-- Generated from nodes.xlsx using countries.xlsx for ISO code mapping\n\n")
# Iteriere über alle Zeilen
for index, row in nodes_df.iterrows():
node_id = index + 1 # 1-basiert
# Extrahiere Daten
external_mapping_id = str(row['external mapping id']).strip() if pd.notna(row['external mapping id']) else None
name = str(row['Name']).strip() if pd.notna(row['Name']) else None
address = str(row['Address']).strip() if pd.notna(row['Address']) else None
country_name = str(row['Country ']).strip() if pd.notna(row['Country ']) else None # Beachte das Leerzeichen im Spaltennamen
# Prüfe, ob Country-Name im Mapping existiert
if not country_name or country_name not in country_mapping:
print(f"FEHLER: Land '{country_name}' in Zeile {node_id} nicht in countries.xlsx gefunden!")
print(f"Verfügbare Länder: {sorted(country_mapping.keys())}")
sys.exit(1)
iso_code = country_mapping[country_name]
# Parse Geo-Position
geo_lat, geo_lng = parse_geo_position(row['Geo position'])
# Bestimme Node-Typen
is_destination, is_source, is_intermediate = determine_node_type(row['Type'])
print(f"{node_id} {name}: {is_destination}, {is_source}, {is_intermediate}")
# Predecessor required
predecessor_required = False
if pd.notna(row['predecessor_required']):
pred_str = str(row['predecessor_required']).lower().strip()
predecessor_required = pred_str in ['yes', 'true', '1', 'ja']
# Schreibe SQL INSERT Statement
f.write(f"-- Node {node_id}: {name or 'Unknown'}\n")
f.write("INSERT INTO node (\n")
f.write(" id,\n")
f.write(" country_id,\n")
f.write(" name,\n")
f.write(" address,\n")
f.write(" external_mapping_id,\n")
f.write(" predecessor_required,\n")
f.write(" is_destination,\n")
f.write(" is_source,\n")
f.write(" is_intermediate,\n")
f.write(" geo_lat,\n")
f.write(" geo_lng\n")
f.write(") VALUES (\n")
f.write(f" {node_id},\n")
f.write(f" (SELECT id FROM country WHERE iso_code = '{iso_code}'),\n")
f.write(f" {escape_sql_string(name)},\n")
f.write(f" {escape_sql_string(address)},\n")
f.write(f" {escape_sql_string(external_mapping_id)},\n")
f.write(f" {'true' if predecessor_required else 'false'},\n")
f.write(f" {'true' if is_destination else 'false'},\n")
f.write(f" {'true' if is_source else 'false'},\n")
f.write(f" {'true' if is_intermediate else 'false'},\n")
f.write(f" {geo_lat if geo_lat is not None else 'NULL'},\n")
f.write(f" {geo_lng if geo_lng is not None else 'NULL'}\n")
f.write(" );\n\n")
print(f"SQL-Datei erfolgreich erstellt: {output_file}")
print(f"Insgesamt {len(nodes_df)} INSERT Statements generiert")
except Exception as e:
print(f"Fehler beim Schreiben der SQL-Datei: {e}")
sys.exit(1)
def main():
"""
Hauptprogramm
"""
# Standarddateinamen
nodes_file = 'nodes.xlsx'
countries_file = 'countries.xlsx'
output_file = '03-nodes.sql'
# Prüfe, ob Dateien existieren
if not Path(nodes_file).exists():
print(f"Fehler: {nodes_file} nicht gefunden!")
sys.exit(1)
if not Path(countries_file).exists():
print(f"Fehler: {countries_file} nicht gefunden!")
sys.exit(1)
print(f"Konvertiere {nodes_file} zu SQL...")
print(f"Verwende {countries_file} für ISO-Code Mapping...")
print(f"Output-Datei: {output_file}")
print("-" * 50)
# Führe Konvertierung aus
convert_nodes_to_sql(nodes_file, countries_file, output_file)
print("-" * 50)
print("Konvertierung erfolgreich abgeschlossen!")
if __name__ == "__main__":
main()

View file

@ -0,0 +1,116 @@
import pandas as pd
import sys
from pathlib import Path
def convert_excel_to_sql(excel_file_path, output_file_path=None):
"""
Konvertiert eine Excel-Datei mit Node-Predecessor-Daten in SQL-Statements.
Args:
excel_file_path (str): Pfad zur Excel-Datei
output_file_path (str, optional): Pfad zur Ausgabe-SQL-Datei.
Wenn None, wird automatisch generiert.
"""
try:
# Excel-Datei laden
df = pd.read_excel(excel_file_path)
# Spalten-Namen bereinigen (falls nötig)
df.columns = df.columns.str.strip()
# Erwartete Spalten prüfen
expected_columns = ['node', 'Pre-node 1', 'Pre-node 2', 'Pre-node 3']
if not all(col in df.columns for col in expected_columns):
print(f"Fehler: Erwartete Spalten nicht gefunden. Gefundene Spalten: {list(df.columns)}")
return
# Ausgabe-Datei festlegen
if output_file_path is None:
output_file_path = Path(excel_file_path).stem + '_converted.sql'
# SQL-Statements generieren
sql_statements = []
# Header-Kommentar hinzufügen
sql_statements.append("-- Automatisch generierte SQL-Statements für Node Predecessor Chains")
sql_statements.append("-- Generiert aus: " + str(excel_file_path))
sql_statements.append("")
chain_counter = 1
for index, row in df.iterrows():
node_id = row['node']
# Leere Zeilen überspringen
if pd.isna(node_id) or str(node_id).strip() == '':
continue
# Kommentar für die Chain
sql_statements.append(f"-- Predecessor Chain {chain_counter}: {node_id}")
# Node Predecessor Chain erstellen
sql_statements.append("INSERT INTO node_predecessor_chain (")
sql_statements.append(" node_id")
sql_statements.append(") VALUES (")
sql_statements.append(f" (SELECT id FROM node WHERE external_mapping_id = '{node_id}')")
sql_statements.append(" );")
sql_statements.append("")
# Variable für Chain-ID setzen
sql_statements.append(f"SET @chain_id_{chain_counter} = LAST_INSERT_ID();")
sql_statements.append("")
# Predecessor Entries erstellen (nur wenn nicht leer)
sequence_number = 1
for pre_node_col in ['Pre-node 1', 'Pre-node 2', 'Pre-node 3']:
pre_node_value = row[pre_node_col]
# Nur verarbeiten wenn Wert vorhanden ist
if not pd.isna(pre_node_value) and str(pre_node_value).strip() != '':
sql_statements.append("INSERT INTO node_predecessor_entry (")
sql_statements.append(" node_id,")
sql_statements.append(" node_predecessor_chain_id,")
sql_statements.append(" sequence_number")
sql_statements.append(") VALUES (")
sql_statements.append(f" (SELECT id FROM node WHERE external_mapping_id = '{pre_node_value}'),")
sql_statements.append(f" @chain_id_{chain_counter},")
sql_statements.append(f" {sequence_number}")
sql_statements.append(" );")
sql_statements.append("")
sequence_number += 1
chain_counter += 1
# SQL-Datei schreiben
with open(output_file_path, 'w', encoding='utf-8') as f:
f.write('\n'.join(sql_statements))
print(f"Erfolgreich konvertiert! SQL-Datei erstellt: {output_file_path}")
print(f"Verarbeitete Zeilen: {len(df)}")
print(f"Generierte Chains: {chain_counter - 1}")
except FileNotFoundError:
print(f"Fehler: Datei '{excel_file_path}' nicht gefunden.")
except Exception as e:
print(f"Fehler beim Verarbeiten der Datei: {str(e)}")
def main():
"""
Hauptfunktion für die Kommandozeilen-Nutzung.
"""
if len(sys.argv) < 2:
print("Verwendung: python excel_to_sql.py <excel_datei> [ausgabe_datei]")
print("Beispiel: python excel_to_sql.py pre_nodes.xlsx output.sql")
return
excel_file = sys.argv[1]
output_file = sys.argv[2] if len(sys.argv) > 2 else None
convert_excel_to_sql(excel_file, output_file)
if __name__ == "__main__":
main()
# Beispiel für die direkte Verwendung im Script:
# convert_excel_to_sql('pre_nodes.xlsx', 'predecessor_chains.sql')

Binary file not shown.

Binary file not shown.