New scripts for sql file generation

This commit is contained in:
Jan 2025-09-24 00:09:57 +02:00
parent f68a6b30a5
commit 2f7df47df4
6 changed files with 357 additions and 0 deletions

View file

@ -0,0 +1,164 @@
#!/usr/bin/env python3
"""
Excel zu SQL Konverter für Nodes (Neues Format)
Konvertiert nodes.xlsx in SQL INSERT Statements
"""
import pandas as pd
import sys
from pathlib import Path
def parse_boolean_value(value):
"""
Konvertiert verschiedene Boolean-Darstellungen zu Python boolean
"""
if pd.isna(value):
return False
str_value = str(value).lower().strip()
return str_value in ['true', 'yes', '1', 'ja', 'wahr']
def escape_sql_string(value):
"""
Escaped SQL-Strings für sichere Einfügung
"""
if pd.isna(value):
return 'NULL'
# Konvertiere zu String und escape single quotes
str_value = str(value).replace("'", "''")
return f"'{str_value}'"
def convert_nodes_to_sql(nodes_file, output_file):
"""
Hauptfunktion: Konvertiert nodes.xlsx zu SQL INSERT Statements
"""
try:
# Lade nodes.xlsx
nodes_df = pd.read_excel(nodes_file, sheet_name='Tabelle1')
print(f"Erfolgreich {len(nodes_df)} Nodes geladen")
# Zeige gefundene Spalten zur Überprüfung
print(f"Gefundene Spalten: {nodes_df.columns.tolist()}")
except Exception as e:
print(f"Fehler beim Laden der nodes.xlsx: {e}")
sys.exit(1)
# Öffne Output-Datei
try:
with open(output_file, 'w', encoding='utf-8') as f:
f.write("-- Generated SQL INSERT statements for nodes\n")
f.write("-- Generated from nodes.xlsx (new format)\n\n")
# Iteriere über alle Zeilen
for index, row in nodes_df.iterrows():
node_id = index + 1 # 1-basiert
# Extrahiere Daten aus dem neuen Format
external_mapping_id = str(row['Mapping ID']).strip() if pd.notna(row['Mapping ID']) else None
name = str(row['Name']).strip() if pd.notna(row['Name']) else None
address = str(row['Address']).strip() if pd.notna(row['Address']) else None
# ISO-Code direkt aus der Spalte
iso_code = str(row['Country (ISO 3166-1)']).strip() if pd.notna(row['Country (ISO 3166-1)']) else None
# Latitude und Longitude (bereits getrennt)
geo_lat = None
geo_lng = None
if pd.notna(row['Latitude']) and pd.notna(row['Longitude']):
try:
geo_lat = round(float(row['Latitude']), 4)
geo_lng = round(float(row['Longitude']), 4)
except ValueError:
print(f"Warnung: Ungültige Geo-Koordinaten in Zeile {node_id}")
# Node-Typen aus separaten Spalten
is_source = parse_boolean_value(row['Source'])
is_intermediate = parse_boolean_value(row['Intermediate'])
is_destination = parse_boolean_value(row['Destination'])
# Predecessor required (neue Spaltenbezeichnung)
predecessor_required = parse_boolean_value(row['Predecessors mandatory'])
# Validierung: Mindestens ein Node-Typ sollte gesetzt sein
if not (is_source or is_intermediate or is_destination):
print(f"Warnung: Zeile {node_id} ({name}) hat keinen Node-Typ gesetzt")
# Debug-Ausgabe für Überprüfung
print(f"Node {node_id}: {name}")
print(f" - Source: {is_source}, Intermediate: {is_intermediate}, Destination: {is_destination}")
print(f" - ISO Code: {iso_code}, Predecessor required: {predecessor_required}")
# Schreibe SQL INSERT Statement
f.write(f"-- Node {node_id}: {name or 'Unknown'}\n")
f.write("INSERT INTO node (\n")
f.write(" id,\n")
f.write(" country_id,\n")
f.write(" name,\n")
f.write(" address,\n")
f.write(" external_mapping_id,\n")
f.write(" predecessor_required,\n")
f.write(" is_destination,\n")
f.write(" is_source,\n")
f.write(" is_intermediate,\n")
f.write(" geo_lat,\n")
f.write(" geo_lng\n")
f.write(") VALUES (\n")
f.write(f" {node_id},\n")
f.write(f" (SELECT id FROM country WHERE iso_code = '{iso_code}'),\n")
f.write(f" {escape_sql_string(name)},\n")
f.write(f" {escape_sql_string(address)},\n")
f.write(f" {escape_sql_string(external_mapping_id)},\n")
f.write(f" {'true' if predecessor_required else 'false'},\n")
f.write(f" {'true' if is_destination else 'false'},\n")
f.write(f" {'true' if is_source else 'false'},\n")
f.write(f" {'true' if is_intermediate else 'false'},\n")
f.write(f" {geo_lat if geo_lat is not None else 'NULL'},\n")
f.write(f" {geo_lng if geo_lng is not None else 'NULL'}\n")
f.write(" );\n\n")
print(f"\nSQL-Datei erfolgreich erstellt: {output_file}")
print(f"Insgesamt {len(nodes_df)} INSERT Statements generiert")
except Exception as e:
print(f"Fehler beim Schreiben der SQL-Datei: {e}")
sys.exit(1)
def main():
"""
Hauptprogramm
"""
if len(sys.argv) < 2:
print("Verwendung: python 03-excel_export__node_sql_extract.py <excel_datei> [ausgabe_datei]")
print("")
print("Beispiele:")
print(" python 03-excel_export__node_sql_extract.py nodes.xlsx output.sql")
print(" python 03-excel_export__node_sql_extract.py nodes.xlsx")
return
# Kommandozeilenargumente verarbeiten
if len(sys.argv) > 1:
nodes_file = sys.argv[1]
if len(sys.argv) > 2:
output_file = sys.argv[2]
# Prüfe, ob Datei existiert
if not Path(nodes_file).exists():
print(f"Fehler: {nodes_file} nicht gefunden!")
print(f"Verwendung: {sys.argv[0]} [input.xlsx] [output.sql]")
sys.exit(1)
print(f"Konvertiere {nodes_file} zu SQL...")
print(f"Output-Datei: {output_file}")
print("-" * 50)
# Führe Konvertierung aus
convert_nodes_to_sql(nodes_file, output_file)
print("-" * 50)
print("Konvertierung erfolgreich abgeschlossen!")
if __name__ == "__main__":
main()

View file

@ -0,0 +1,193 @@
import pandas as pd
import sys
from pathlib import Path
def convert_excel_to_sql(excel_file_path, output_file_path=None):
"""
Konvertiert eine Excel-Datei mit Node-Predecessor-Daten in SQL-Statements.
Neues Format: 'Mapping ID' und 'Predecessor Nodes (Mapping ID)'
Args:
excel_file_path (str): Pfad zur Excel-Datei
output_file_path (str, optional): Pfad zur Ausgabe-SQL-Datei.
Wenn None, wird automatisch generiert.
"""
try:
# Excel-Datei laden
df = pd.read_excel(excel_file_path)
# Spalten-Namen bereinigen (falls nötig)
df.columns = df.columns.str.strip()
# Erwartete Spalten prüfen - NEUES FORMAT
expected_columns = ['Mapping ID', 'Predecessor Nodes (Mapping ID)']
if not all(col in df.columns for col in expected_columns):
print(f"Fehler: Erwartete Spalten nicht gefunden.")
print(f"Erwartete Spalten: {expected_columns}")
print(f"Gefundene Spalten: {list(df.columns)}")
return
# Ausgabe-Datei festlegen
if output_file_path is None:
output_file_path = Path(excel_file_path).stem + '_converted.sql'
# SQL-Statements generieren
sql_statements = []
# Header-Kommentar hinzufügen
sql_statements.append("-- Automatisch generierte SQL-Statements für Node Predecessor Chains")
sql_statements.append("-- Generiert aus: " + str(excel_file_path))
sql_statements.append("-- Format: Mehrere Chains pro Node möglich (mit ; getrennt)")
sql_statements.append("")
chain_counter = 1
total_chains = 0
for index, row in df.iterrows():
node_id = row['Mapping ID']
predecessor_nodes = row['Predecessor Nodes (Mapping ID)']
# Leere Zeilen überspringen
if pd.isna(node_id) or str(node_id).strip() == '':
continue
# Wenn keine Predecessors vorhanden, überspringen
if pd.isna(predecessor_nodes) or str(predecessor_nodes).strip() == '':
continue
# Predecessor Chains verarbeiten (mehrere Chains mit ; getrennt)
chains = str(predecessor_nodes).strip().split(';')
for chain_idx, chain in enumerate(chains, 1):
chain = chain.strip()
if not chain: # Leere Chain überspringen
continue
# Pre-nodes innerhalb der Chain (mit , getrennt)
pre_nodes = [node.strip() for node in chain.split(',') if node.strip()]
if not pre_nodes: # Keine gültigen Pre-nodes
continue
# Kommentar für die Chain
sql_statements.append(f"-- Predecessor Chain {chain_counter}: {node_id} (Chain {chain_idx} von {len(chains)})")
sql_statements.append(f"-- Predecessors: {', '.join(pre_nodes)}")
# Node Predecessor Chain erstellen
sql_statements.append("INSERT INTO node_predecessor_chain (")
sql_statements.append(" node_id")
sql_statements.append(") VALUES (")
sql_statements.append(f" (SELECT id FROM node WHERE external_mapping_id = '{node_id}')")
sql_statements.append(");")
sql_statements.append("")
# Variable für Chain-ID setzen
sql_statements.append(f"SET @chain_id_{chain_counter} = LAST_INSERT_ID();")
sql_statements.append("")
# Predecessor Entries erstellen
for sequence_number, pre_node in enumerate(pre_nodes, 1):
sql_statements.append("INSERT INTO node_predecessor_entry (")
sql_statements.append(" node_id,")
sql_statements.append(" node_predecessor_chain_id,")
sql_statements.append(" sequence_number")
sql_statements.append(") VALUES (")
sql_statements.append(f" (SELECT id FROM node WHERE external_mapping_id = '{pre_node}'),")
sql_statements.append(f" @chain_id_{chain_counter},")
sql_statements.append(f" {sequence_number}")
sql_statements.append(");")
sql_statements.append("")
chain_counter += 1
total_chains += 1
sql_statements.append("") # Extra Leerzeile zwischen verschiedenen Nodes
# SQL-Datei schreiben
with open(output_file_path, 'w', encoding='utf-8') as f:
f.write('\n'.join(sql_statements))
print(f"Erfolgreich konvertiert! SQL-Datei erstellt: {output_file_path}")
print(f"Verarbeitete Zeilen: {len(df)}")
print(f"Generierte Chains: {total_chains}")
# Statistik ausgeben
print("\nStatistik:")
processed_nodes = 0
for _, row in df.iterrows():
if not pd.isna(row['Mapping ID']) and str(row['Mapping ID']).strip():
if not pd.isna(row['Predecessor Nodes (Mapping ID)']) and str(row['Predecessor Nodes (Mapping ID)']).strip():
processed_nodes += 1
print(f"Nodes mit Predecessors: {processed_nodes}")
except FileNotFoundError:
print(f"Fehler: Datei '{excel_file_path}' nicht gefunden.")
except Exception as e:
print(f"Fehler beim Verarbeiten der Datei: {str(e)}")
import traceback
traceback.print_exc()
def analyze_excel_structure(excel_file_path):
"""
Hilfsfunktion zur Analyse der Excel-Struktur.
Zeigt die ersten Zeilen und Spalteninfo an.
"""
try:
df = pd.read_excel(excel_file_path)
print("\n=== Excel-Datei Analyse ===")
print(f"Spalten: {list(df.columns)}")
print(f"Anzahl Zeilen: {len(df)}")
print("\nErste 5 Zeilen:")
print(df.head())
# Analyse der Predecessor-Spalte
if 'Predecessor Nodes (Mapping ID)' in df.columns:
pred_col = df['Predecessor Nodes (Mapping ID)']
non_empty = pred_col.dropna()
if len(non_empty) > 0:
print("\n=== Predecessor Chains Analyse ===")
print(f"Zeilen mit Predecessors: {len(non_empty)}")
# Beispiele für verschiedene Chain-Strukturen
print("\nBeispiele für Predecessor-Strukturen:")
for i, val in enumerate(non_empty.head(3)):
print(f" Beispiel {i+1}: {val}")
chains = str(val).split(';')
print(f" -> {len(chains)} Chain(s)")
for j, chain in enumerate(chains, 1):
nodes = [n.strip() for n in chain.split(',') if n.strip()]
print(f" Chain {j}: {len(nodes)} Node(s): {nodes}")
except Exception as e:
print(f"Fehler bei der Analyse: {str(e)}")
def main():
"""
Hauptfunktion für die Kommandozeilen-Nutzung.
"""
if len(sys.argv) < 2:
print("Verwendung: python 04-excel_export__predecessor_sql_extract.py <excel_datei> [ausgabe_datei]")
print("")
print("Beispiele:")
print(" python 04-excel_export__predecessor_sql_extract.py nodes.xlsx output.sql")
print(" python 04-excel_export__predecessor_sql_extract.py nodes.xlsx")
return
# Analyse-Modus
if sys.argv[1] == '--analyze' and len(sys.argv) > 2:
analyze_excel_structure(sys.argv[2])
return
excel_file = sys.argv[1]
output_file = sys.argv[2] if len(sys.argv) > 2 else None
convert_excel_to_sql(excel_file, output_file)
if __name__ == "__main__":
main()
# Beispiel für die direkte Verwendung im Script:
# convert_excel_to_sql('nodes.xlsx', 'predecessor_chains.sql')
#
# Beispiel für Analyse der Excel-Struktur:
# analyze_excel_structure('nodes.xlsx')