New scripts for sql file generation
This commit is contained in:
parent
f68a6b30a5
commit
2f7df47df4
6 changed files with 357 additions and 0 deletions
|
|
@ -0,0 +1,164 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Excel zu SQL Konverter für Nodes (Neues Format)
|
||||
Konvertiert nodes.xlsx in SQL INSERT Statements
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
def parse_boolean_value(value):
|
||||
"""
|
||||
Konvertiert verschiedene Boolean-Darstellungen zu Python boolean
|
||||
"""
|
||||
if pd.isna(value):
|
||||
return False
|
||||
|
||||
str_value = str(value).lower().strip()
|
||||
return str_value in ['true', 'yes', '1', 'ja', 'wahr']
|
||||
|
||||
def escape_sql_string(value):
|
||||
"""
|
||||
Escaped SQL-Strings für sichere Einfügung
|
||||
"""
|
||||
if pd.isna(value):
|
||||
return 'NULL'
|
||||
|
||||
# Konvertiere zu String und escape single quotes
|
||||
str_value = str(value).replace("'", "''")
|
||||
return f"'{str_value}'"
|
||||
|
||||
def convert_nodes_to_sql(nodes_file, output_file):
|
||||
"""
|
||||
Hauptfunktion: Konvertiert nodes.xlsx zu SQL INSERT Statements
|
||||
"""
|
||||
|
||||
try:
|
||||
# Lade nodes.xlsx
|
||||
nodes_df = pd.read_excel(nodes_file, sheet_name='Tabelle1')
|
||||
print(f"Erfolgreich {len(nodes_df)} Nodes geladen")
|
||||
|
||||
# Zeige gefundene Spalten zur Überprüfung
|
||||
print(f"Gefundene Spalten: {nodes_df.columns.tolist()}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Fehler beim Laden der nodes.xlsx: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
# Öffne Output-Datei
|
||||
try:
|
||||
with open(output_file, 'w', encoding='utf-8') as f:
|
||||
f.write("-- Generated SQL INSERT statements for nodes\n")
|
||||
f.write("-- Generated from nodes.xlsx (new format)\n\n")
|
||||
|
||||
# Iteriere über alle Zeilen
|
||||
for index, row in nodes_df.iterrows():
|
||||
node_id = index + 1 # 1-basiert
|
||||
|
||||
# Extrahiere Daten aus dem neuen Format
|
||||
external_mapping_id = str(row['Mapping ID']).strip() if pd.notna(row['Mapping ID']) else None
|
||||
name = str(row['Name']).strip() if pd.notna(row['Name']) else None
|
||||
address = str(row['Address']).strip() if pd.notna(row['Address']) else None
|
||||
|
||||
# ISO-Code direkt aus der Spalte
|
||||
iso_code = str(row['Country (ISO 3166-1)']).strip() if pd.notna(row['Country (ISO 3166-1)']) else None
|
||||
|
||||
# Latitude und Longitude (bereits getrennt)
|
||||
geo_lat = None
|
||||
geo_lng = None
|
||||
if pd.notna(row['Latitude']) and pd.notna(row['Longitude']):
|
||||
try:
|
||||
geo_lat = round(float(row['Latitude']), 4)
|
||||
geo_lng = round(float(row['Longitude']), 4)
|
||||
except ValueError:
|
||||
print(f"Warnung: Ungültige Geo-Koordinaten in Zeile {node_id}")
|
||||
|
||||
# Node-Typen aus separaten Spalten
|
||||
is_source = parse_boolean_value(row['Source'])
|
||||
is_intermediate = parse_boolean_value(row['Intermediate'])
|
||||
is_destination = parse_boolean_value(row['Destination'])
|
||||
|
||||
# Predecessor required (neue Spaltenbezeichnung)
|
||||
predecessor_required = parse_boolean_value(row['Predecessors mandatory'])
|
||||
|
||||
# Validierung: Mindestens ein Node-Typ sollte gesetzt sein
|
||||
if not (is_source or is_intermediate or is_destination):
|
||||
print(f"Warnung: Zeile {node_id} ({name}) hat keinen Node-Typ gesetzt")
|
||||
|
||||
# Debug-Ausgabe für Überprüfung
|
||||
print(f"Node {node_id}: {name}")
|
||||
print(f" - Source: {is_source}, Intermediate: {is_intermediate}, Destination: {is_destination}")
|
||||
print(f" - ISO Code: {iso_code}, Predecessor required: {predecessor_required}")
|
||||
|
||||
# Schreibe SQL INSERT Statement
|
||||
f.write(f"-- Node {node_id}: {name or 'Unknown'}\n")
|
||||
f.write("INSERT INTO node (\n")
|
||||
f.write(" id,\n")
|
||||
f.write(" country_id,\n")
|
||||
f.write(" name,\n")
|
||||
f.write(" address,\n")
|
||||
f.write(" external_mapping_id,\n")
|
||||
f.write(" predecessor_required,\n")
|
||||
f.write(" is_destination,\n")
|
||||
f.write(" is_source,\n")
|
||||
f.write(" is_intermediate,\n")
|
||||
f.write(" geo_lat,\n")
|
||||
f.write(" geo_lng\n")
|
||||
f.write(") VALUES (\n")
|
||||
f.write(f" {node_id},\n")
|
||||
f.write(f" (SELECT id FROM country WHERE iso_code = '{iso_code}'),\n")
|
||||
f.write(f" {escape_sql_string(name)},\n")
|
||||
f.write(f" {escape_sql_string(address)},\n")
|
||||
f.write(f" {escape_sql_string(external_mapping_id)},\n")
|
||||
f.write(f" {'true' if predecessor_required else 'false'},\n")
|
||||
f.write(f" {'true' if is_destination else 'false'},\n")
|
||||
f.write(f" {'true' if is_source else 'false'},\n")
|
||||
f.write(f" {'true' if is_intermediate else 'false'},\n")
|
||||
f.write(f" {geo_lat if geo_lat is not None else 'NULL'},\n")
|
||||
f.write(f" {geo_lng if geo_lng is not None else 'NULL'}\n")
|
||||
f.write(" );\n\n")
|
||||
|
||||
print(f"\nSQL-Datei erfolgreich erstellt: {output_file}")
|
||||
print(f"Insgesamt {len(nodes_df)} INSERT Statements generiert")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Fehler beim Schreiben der SQL-Datei: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
def main():
|
||||
"""
|
||||
Hauptprogramm
|
||||
"""
|
||||
if len(sys.argv) < 2:
|
||||
print("Verwendung: python 03-excel_export__node_sql_extract.py <excel_datei> [ausgabe_datei]")
|
||||
print("")
|
||||
print("Beispiele:")
|
||||
print(" python 03-excel_export__node_sql_extract.py nodes.xlsx output.sql")
|
||||
print(" python 03-excel_export__node_sql_extract.py nodes.xlsx")
|
||||
return
|
||||
|
||||
# Kommandozeilenargumente verarbeiten
|
||||
if len(sys.argv) > 1:
|
||||
nodes_file = sys.argv[1]
|
||||
if len(sys.argv) > 2:
|
||||
output_file = sys.argv[2]
|
||||
|
||||
# Prüfe, ob Datei existiert
|
||||
if not Path(nodes_file).exists():
|
||||
print(f"Fehler: {nodes_file} nicht gefunden!")
|
||||
print(f"Verwendung: {sys.argv[0]} [input.xlsx] [output.sql]")
|
||||
sys.exit(1)
|
||||
|
||||
print(f"Konvertiere {nodes_file} zu SQL...")
|
||||
print(f"Output-Datei: {output_file}")
|
||||
print("-" * 50)
|
||||
|
||||
# Führe Konvertierung aus
|
||||
convert_nodes_to_sql(nodes_file, output_file)
|
||||
|
||||
print("-" * 50)
|
||||
print("Konvertierung erfolgreich abgeschlossen!")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -0,0 +1,193 @@
|
|||
import pandas as pd
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
def convert_excel_to_sql(excel_file_path, output_file_path=None):
|
||||
"""
|
||||
Konvertiert eine Excel-Datei mit Node-Predecessor-Daten in SQL-Statements.
|
||||
Neues Format: 'Mapping ID' und 'Predecessor Nodes (Mapping ID)'
|
||||
|
||||
Args:
|
||||
excel_file_path (str): Pfad zur Excel-Datei
|
||||
output_file_path (str, optional): Pfad zur Ausgabe-SQL-Datei.
|
||||
Wenn None, wird automatisch generiert.
|
||||
"""
|
||||
try:
|
||||
# Excel-Datei laden
|
||||
df = pd.read_excel(excel_file_path)
|
||||
|
||||
# Spalten-Namen bereinigen (falls nötig)
|
||||
df.columns = df.columns.str.strip()
|
||||
|
||||
# Erwartete Spalten prüfen - NEUES FORMAT
|
||||
expected_columns = ['Mapping ID', 'Predecessor Nodes (Mapping ID)']
|
||||
if not all(col in df.columns for col in expected_columns):
|
||||
print(f"Fehler: Erwartete Spalten nicht gefunden.")
|
||||
print(f"Erwartete Spalten: {expected_columns}")
|
||||
print(f"Gefundene Spalten: {list(df.columns)}")
|
||||
return
|
||||
|
||||
# Ausgabe-Datei festlegen
|
||||
if output_file_path is None:
|
||||
output_file_path = Path(excel_file_path).stem + '_converted.sql'
|
||||
|
||||
# SQL-Statements generieren
|
||||
sql_statements = []
|
||||
|
||||
# Header-Kommentar hinzufügen
|
||||
sql_statements.append("-- Automatisch generierte SQL-Statements für Node Predecessor Chains")
|
||||
sql_statements.append("-- Generiert aus: " + str(excel_file_path))
|
||||
sql_statements.append("-- Format: Mehrere Chains pro Node möglich (mit ; getrennt)")
|
||||
sql_statements.append("")
|
||||
|
||||
chain_counter = 1
|
||||
total_chains = 0
|
||||
|
||||
for index, row in df.iterrows():
|
||||
node_id = row['Mapping ID']
|
||||
predecessor_nodes = row['Predecessor Nodes (Mapping ID)']
|
||||
|
||||
# Leere Zeilen überspringen
|
||||
if pd.isna(node_id) or str(node_id).strip() == '':
|
||||
continue
|
||||
|
||||
# Wenn keine Predecessors vorhanden, überspringen
|
||||
if pd.isna(predecessor_nodes) or str(predecessor_nodes).strip() == '':
|
||||
continue
|
||||
|
||||
# Predecessor Chains verarbeiten (mehrere Chains mit ; getrennt)
|
||||
chains = str(predecessor_nodes).strip().split(';')
|
||||
|
||||
for chain_idx, chain in enumerate(chains, 1):
|
||||
chain = chain.strip()
|
||||
if not chain: # Leere Chain überspringen
|
||||
continue
|
||||
|
||||
# Pre-nodes innerhalb der Chain (mit , getrennt)
|
||||
pre_nodes = [node.strip() for node in chain.split(',') if node.strip()]
|
||||
|
||||
if not pre_nodes: # Keine gültigen Pre-nodes
|
||||
continue
|
||||
|
||||
# Kommentar für die Chain
|
||||
sql_statements.append(f"-- Predecessor Chain {chain_counter}: {node_id} (Chain {chain_idx} von {len(chains)})")
|
||||
sql_statements.append(f"-- Predecessors: {', '.join(pre_nodes)}")
|
||||
|
||||
# Node Predecessor Chain erstellen
|
||||
sql_statements.append("INSERT INTO node_predecessor_chain (")
|
||||
sql_statements.append(" node_id")
|
||||
sql_statements.append(") VALUES (")
|
||||
sql_statements.append(f" (SELECT id FROM node WHERE external_mapping_id = '{node_id}')")
|
||||
sql_statements.append(");")
|
||||
sql_statements.append("")
|
||||
|
||||
# Variable für Chain-ID setzen
|
||||
sql_statements.append(f"SET @chain_id_{chain_counter} = LAST_INSERT_ID();")
|
||||
sql_statements.append("")
|
||||
|
||||
# Predecessor Entries erstellen
|
||||
for sequence_number, pre_node in enumerate(pre_nodes, 1):
|
||||
sql_statements.append("INSERT INTO node_predecessor_entry (")
|
||||
sql_statements.append(" node_id,")
|
||||
sql_statements.append(" node_predecessor_chain_id,")
|
||||
sql_statements.append(" sequence_number")
|
||||
sql_statements.append(") VALUES (")
|
||||
sql_statements.append(f" (SELECT id FROM node WHERE external_mapping_id = '{pre_node}'),")
|
||||
sql_statements.append(f" @chain_id_{chain_counter},")
|
||||
sql_statements.append(f" {sequence_number}")
|
||||
sql_statements.append(");")
|
||||
sql_statements.append("")
|
||||
|
||||
chain_counter += 1
|
||||
total_chains += 1
|
||||
|
||||
sql_statements.append("") # Extra Leerzeile zwischen verschiedenen Nodes
|
||||
|
||||
# SQL-Datei schreiben
|
||||
with open(output_file_path, 'w', encoding='utf-8') as f:
|
||||
f.write('\n'.join(sql_statements))
|
||||
|
||||
print(f"Erfolgreich konvertiert! SQL-Datei erstellt: {output_file_path}")
|
||||
print(f"Verarbeitete Zeilen: {len(df)}")
|
||||
print(f"Generierte Chains: {total_chains}")
|
||||
|
||||
# Statistik ausgeben
|
||||
print("\nStatistik:")
|
||||
processed_nodes = 0
|
||||
for _, row in df.iterrows():
|
||||
if not pd.isna(row['Mapping ID']) and str(row['Mapping ID']).strip():
|
||||
if not pd.isna(row['Predecessor Nodes (Mapping ID)']) and str(row['Predecessor Nodes (Mapping ID)']).strip():
|
||||
processed_nodes += 1
|
||||
print(f"Nodes mit Predecessors: {processed_nodes}")
|
||||
|
||||
except FileNotFoundError:
|
||||
print(f"Fehler: Datei '{excel_file_path}' nicht gefunden.")
|
||||
except Exception as e:
|
||||
print(f"Fehler beim Verarbeiten der Datei: {str(e)}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
def analyze_excel_structure(excel_file_path):
|
||||
"""
|
||||
Hilfsfunktion zur Analyse der Excel-Struktur.
|
||||
Zeigt die ersten Zeilen und Spalteninfo an.
|
||||
"""
|
||||
try:
|
||||
df = pd.read_excel(excel_file_path)
|
||||
print("\n=== Excel-Datei Analyse ===")
|
||||
print(f"Spalten: {list(df.columns)}")
|
||||
print(f"Anzahl Zeilen: {len(df)}")
|
||||
print("\nErste 5 Zeilen:")
|
||||
print(df.head())
|
||||
|
||||
# Analyse der Predecessor-Spalte
|
||||
if 'Predecessor Nodes (Mapping ID)' in df.columns:
|
||||
pred_col = df['Predecessor Nodes (Mapping ID)']
|
||||
non_empty = pred_col.dropna()
|
||||
if len(non_empty) > 0:
|
||||
print("\n=== Predecessor Chains Analyse ===")
|
||||
print(f"Zeilen mit Predecessors: {len(non_empty)}")
|
||||
|
||||
# Beispiele für verschiedene Chain-Strukturen
|
||||
print("\nBeispiele für Predecessor-Strukturen:")
|
||||
for i, val in enumerate(non_empty.head(3)):
|
||||
print(f" Beispiel {i+1}: {val}")
|
||||
chains = str(val).split(';')
|
||||
print(f" -> {len(chains)} Chain(s)")
|
||||
for j, chain in enumerate(chains, 1):
|
||||
nodes = [n.strip() for n in chain.split(',') if n.strip()]
|
||||
print(f" Chain {j}: {len(nodes)} Node(s): {nodes}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Fehler bei der Analyse: {str(e)}")
|
||||
|
||||
def main():
|
||||
"""
|
||||
Hauptfunktion für die Kommandozeilen-Nutzung.
|
||||
"""
|
||||
if len(sys.argv) < 2:
|
||||
print("Verwendung: python 04-excel_export__predecessor_sql_extract.py <excel_datei> [ausgabe_datei]")
|
||||
print("")
|
||||
print("Beispiele:")
|
||||
print(" python 04-excel_export__predecessor_sql_extract.py nodes.xlsx output.sql")
|
||||
print(" python 04-excel_export__predecessor_sql_extract.py nodes.xlsx")
|
||||
return
|
||||
|
||||
# Analyse-Modus
|
||||
if sys.argv[1] == '--analyze' and len(sys.argv) > 2:
|
||||
analyze_excel_structure(sys.argv[2])
|
||||
return
|
||||
|
||||
excel_file = sys.argv[1]
|
||||
output_file = sys.argv[2] if len(sys.argv) > 2 else None
|
||||
|
||||
convert_excel_to_sql(excel_file, output_file)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
# Beispiel für die direkte Verwendung im Script:
|
||||
# convert_excel_to_sql('nodes.xlsx', 'predecessor_chains.sql')
|
||||
#
|
||||
# Beispiel für Analyse der Excel-Struktur:
|
||||
# analyze_excel_structure('nodes.xlsx')
|
||||
Loading…
Add table
Reference in a new issue