Fix: More stable bulk geocoding. Added @Transactional to outer bulk service call, to revert all changes to database if anything fails

This commit is contained in:
Jan 2026-01-23 16:57:06 +01:00
parent 605bcfe0fc
commit 417221eca8
3 changed files with 196 additions and 62 deletions

View file

@ -49,6 +49,7 @@ public class BatchGeoApiService {
ArrayList<BulkInstruction<ExcelNode>> noGeo = new ArrayList<>(); ArrayList<BulkInstruction<ExcelNode>> noGeo = new ArrayList<>();
ArrayList<BulkInstruction<ExcelNode>> failedGeoLookups = new ArrayList<>(); ArrayList<BulkInstruction<ExcelNode>> failedGeoLookups = new ArrayList<>();
ArrayList<BulkInstruction<ExcelNode>> failedFuzzyGeoLookups = new ArrayList<>();
int totalSuccessful = 0; int totalSuccessful = 0;
for (var node : nodes) { for (var node : nodes) {
@ -57,7 +58,6 @@ public class BatchGeoApiService {
} }
} }
for (int currentBatch = 0; currentBatch < noGeo.size(); currentBatch += MAX_BATCH_SIZE) { for (int currentBatch = 0; currentBatch < noGeo.size(); currentBatch += MAX_BATCH_SIZE) {
int end = Math.min(currentBatch + MAX_BATCH_SIZE, noGeo.size()); int end = Math.min(currentBatch + MAX_BATCH_SIZE, noGeo.size());
var chunk = noGeo.subList(currentBatch, end); var chunk = noGeo.subList(currentBatch, end);
@ -67,34 +67,109 @@ public class BatchGeoApiService {
.toList()); .toList());
if (chunkResult.isPresent()) { if (chunkResult.isPresent()) {
var response = chunkResult.get();
totalSuccessful += chunkResult.get().getSummary().getSuccessfulRequests();
if (response.getSummary() != null && response.getSummary().getSuccessfulRequests() != null) {
totalSuccessful += response.getSummary().getSuccessfulRequests();
}
if (response.getBatchItems() == null || response.getBatchItems().isEmpty()) {
logger.warn("Batch response contains no items");
failedGeoLookups.addAll(chunk);
continue;
}
for (int itemIdx = 0; itemIdx < chunk.size(); itemIdx++) { for (int itemIdx = 0; itemIdx < chunk.size(); itemIdx++) {
var result = chunkResult.get().getBatchItems().get(itemIdx);
if (itemIdx >= response.getBatchItems().size()) {
logger.warn("BatchItems size mismatch at index {}", itemIdx);
failedGeoLookups.add(chunk.get(itemIdx));
continue;
}
var result = response.getBatchItems().get(itemIdx);
var node = chunk.get(itemIdx).getEntity(); var node = chunk.get(itemIdx).getEntity();
if (!result.getFeatures().isEmpty() &&
(result.getFeatures().getFirst().getProperties().getConfidence().equalsIgnoreCase("high") || if (result == null || result.getFeatures() == null || result.getFeatures().isEmpty()) {
result.getFeatures().getFirst().getProperties().getConfidence().equalsIgnoreCase("medium") || logger.warn("No geocoding result for address {}",
(result.getFeatures().getFirst().getProperties().getMatchCodes() != null && node.getAddress() != null ? node.getAddress() : "unknown");
result.getFeatures().getFirst().getProperties().getMatchCodes().stream().anyMatch(s -> s.equalsIgnoreCase("good"))))) { failedGeoLookups.add(chunk.get(itemIdx));
var geometry = result.getFeatures().getFirst().getGeometry(); continue;
var properties = result.getFeatures().getFirst().getProperties(); }
node.setGeoLng(BigDecimal.valueOf(geometry.getCoordinates().get(0)));
node.setGeoLat(BigDecimal.valueOf(geometry.getCoordinates().get(1))); var feature = result.getFeatures().getFirst();
node.setAddress(properties.getAddress().getFormattedAddress()); if (feature == null) {
node.setCountryId(IsoCode.valueOf(properties.getAddress().getCountryRegion().getIso())); logger.warn("Feature is null for address {}", node.getAddress());
} else { failedGeoLookups.add(chunk.get(itemIdx));
logger.warn("Geocoding failed for address {}", node.getAddress()); continue;
}
var properties = feature.getProperties();
if (properties == null) {
logger.warn("Properties is null for address {}", node.getAddress());
failedGeoLookups.add(chunk.get(itemIdx));
continue;
}
String confidence = properties.getConfidence();
boolean hasGoodConfidence = confidence != null &&
(confidence.equalsIgnoreCase("high") ||
confidence.equalsIgnoreCase("medium"));
boolean hasGoodMatchCode = properties.getMatchCodes() != null &&
properties.getMatchCodes().stream()
.anyMatch(s -> s != null && s.equalsIgnoreCase("good"));
if (hasGoodConfidence || hasGoodMatchCode) {
var geometry = feature.getGeometry();
if (geometry == null || geometry.getCoordinates() == null ||
geometry.getCoordinates().size() < 2) {
logger.warn("Invalid geometry for address {}", node.getAddress());
failedGeoLookups.add(chunk.get(itemIdx));
continue;
}
var coordinates = geometry.getCoordinates();
if (coordinates.get(0) == null || coordinates.get(1) == null) {
logger.warn("Null coordinates for address {}", node.getAddress());
failedGeoLookups.add(chunk.get(itemIdx));
continue;
}
node.setGeoLng(BigDecimal.valueOf(coordinates.get(0)));
node.setGeoLat(BigDecimal.valueOf(coordinates.get(1)));
if (properties.getAddress() != null &&
properties.getAddress().getFormattedAddress() != null) {
node.setAddress(properties.getAddress().getFormattedAddress());
}
if (properties.getAddress() != null &&
properties.getAddress().getCountryRegion() != null &&
properties.getAddress().getCountryRegion().getIso() != null) {
try {
node.setCountryId(IsoCode.valueOf(
properties.getAddress().getCountryRegion().getIso()));
} catch (IllegalArgumentException e) {
logger.warn("Invalid ISO code: {}",
properties.getAddress().getCountryRegion().getIso());
}
}
} else {
logger.warn("Geocoding failed for address {} (low confidence)",
node.getAddress());
failedGeoLookups.add(chunk.get(itemIdx)); failedGeoLookups.add(chunk.get(itemIdx));
//throw new ExcelValidationError("Unable to geocode " + node.getName() + ". Please check your address or enter geo position yourself.");
} }
} }
} else {
logger.warn("Batch request returned empty result");
failedGeoLookups.addAll(chunk);
} }
} }
// Second pass: fuzzy lookup with company name for failed addresses // Second pass: fuzzy lookup with company name for failed addresses
if (!failedGeoLookups.isEmpty()) { if (!failedGeoLookups.isEmpty()) {
logger.info("Retrying {} failed lookups with fuzzy search", failedGeoLookups.size()); logger.info("Retrying {} failed lookups with fuzzy search", failedGeoLookups.size());
@ -108,31 +183,52 @@ public class BatchGeoApiService {
&& !fuzzyResult.get().getResults().isEmpty()) { && !fuzzyResult.get().getResults().isEmpty()) {
var result = fuzzyResult.get().getResults().getFirst(); var result = fuzzyResult.get().getResults().getFirst();
if (result == null) {
// Score >= 0.7 means good confidence (1.0 = perfect match) logger.warn("Fuzzy result is null for: {}", node.getName());
if (result.getScore() >= 7.0) { failedFuzzyGeoLookups.add(instruction);
node.setGeoLat(BigDecimal.valueOf(result.getPosition().getLat())); continue;
node.setGeoLng(BigDecimal.valueOf(result.getPosition().getLon()));
node.setAddress(result.getAddress().getFreeformAddress());
// Update country if it differs
if (result.getAddress().getCountryCode() != null) {
try {
node.setCountryId(IsoCode.valueOf(result.getAddress().getCountryCode()));
} catch (IllegalArgumentException e) {
logger.warn("Unknown country code: {}", result.getAddress().getCountryCode());
}
}
fuzzySuccessful++;
logger.info("Fuzzy search successful for: {} (score: {})",
node.getName(), result.getScore());
} else {
logger.warn("Fuzzy search returned low confidence result for: {} (score: {})",
node.getName(), result.getScore());
} }
} else {
logger.error("Fuzzy search found no results for: {}", node.getName()); double score = result.getScore();
if (score < 7.0) {
logger.warn("Fuzzy search returned low confidence result for: {} (score: {})",
node.getName(), score);
failedFuzzyGeoLookups.add(instruction);
continue;
}
if (result.getPosition() == null) {
logger.warn("Position is null for: {}", node.getName());
failedFuzzyGeoLookups.add(instruction);
continue;
}
double lat = result.getPosition().getLat();
double lon = result.getPosition().getLon();
node.setGeoLat(BigDecimal.valueOf(lat));
node.setGeoLng(BigDecimal.valueOf(lon));
if (result.getAddress() != null &&
result.getAddress().getFreeformAddress() != null) {
node.setAddress(result.getAddress().getFreeformAddress());
}
if (result.getAddress() != null &&
result.getAddress().getCountryCode() != null) {
try {
node.setCountryId(IsoCode.valueOf(result.getAddress().getCountryCode()));
} catch (IllegalArgumentException e) {
logger.warn("Unknown country code: {}",
result.getAddress().getCountryCode());
failedFuzzyGeoLookups.add(instruction);
continue;
}
}
fuzzySuccessful++;
logger.info("Fuzzy search successful for: {} (score: {})",
node.getName(), score);
} }
} }
@ -140,8 +236,10 @@ public class BatchGeoApiService {
fuzzySuccessful, failedGeoLookups.size()); fuzzySuccessful, failedGeoLookups.size());
// Throw error for remaining failed lookups // Throw error for remaining failed lookups
int remainingFailed = failedGeoLookups.size() - fuzzySuccessful; if (!failedFuzzyGeoLookups.isEmpty()) {
if (remainingFailed > 0) {
failedFuzzyGeoLookups.forEach(instruction -> {logger.warn("Lookup finally failed for: {}", instruction.getEntity().getName());});
var firstFailed = failedGeoLookups.stream() var firstFailed = failedGeoLookups.stream()
.filter(i -> i.getEntity().getGeoLat() == null) .filter(i -> i.getEntity().getGeoLat() == null)
.findFirst() .findFirst()
@ -149,7 +247,9 @@ public class BatchGeoApiService {
.orElse(null); .orElse(null);
if (firstFailed != null) { if (firstFailed != null) {
throw new ExcelValidationError("Unable to geocode " + firstFailed.getName() String name = firstFailed.getName() != null ?
firstFailed.getName() : "unknown";
throw new ExcelValidationError("Unable to geocode " + name
+ ". Please check your address or enter geo position yourself."); + ". Please check your address or enter geo position yourself.");
} }
} }
@ -159,13 +259,32 @@ public class BatchGeoApiService {
private Optional<FuzzySearchResponse> executeFuzzySearch(ExcelNode node) { private Optional<FuzzySearchResponse> executeFuzzySearch(ExcelNode node) {
try { try {
String companyName = node.getName(); String companyName = node.getName();
String country = node.getCountryId().name(); if (companyName == null) {
logger.warn("Company name is null for fuzzy search");
return Optional.empty();
}
IsoCode countryId = node.getCountryId();
if (countryId == null) {
logger.warn("Country ID is null for fuzzy search: {}", companyName);
return Optional.empty();
}
String country = countryId.name();
String address = node.getAddress();
if (address == null) {
logger.warn("Address is null for fuzzy search: {}", companyName);
address = ""; // Fallback zu leerem String
}
// Normalisiere Unicode für konsistente Suche // Normalisiere Unicode für konsistente Suche
companyName = java.text.Normalizer.normalize(companyName, java.text.Normalizer.Form.NFC); companyName = java.text.Normalizer.normalize(companyName,
java.text.Normalizer.Form.NFC);
// URL-Encoding // URL-Encoding
String encodedQuery = URLEncoder.encode(companyName + ", " + node.getAddress() + ", " + country, StandardCharsets.UTF_8); String encodedQuery = URLEncoder.encode(
companyName + ", " + address + ", " + country,
StandardCharsets.UTF_8);
String url = String.format( String url = String.format(
"https://atlas.microsoft.com/search/fuzzy/json?api-version=1.0&subscription-key=%s&query=%s&limit=5", "https://atlas.microsoft.com/search/fuzzy/json?api-version=1.0&subscription-key=%s&query=%s&limit=5",
@ -185,13 +304,21 @@ public class BatchGeoApiService {
return Optional.ofNullable(response.getBody()); return Optional.ofNullable(response.getBody());
} catch (Exception e) { } catch (Exception e) {
logger.error("Fuzzy search failed for {}", node.getName(), e); logger.error("Fuzzy search failed for {}",
node.getName() != null ? node.getName() : "unknown", e);
return Optional.empty(); return Optional.empty();
} }
} }
private String getGeoCodeString(ExcelNode excelNode) { private String getGeoCodeString(ExcelNode excelNode) {
return excelNode.getAddress() + ", " + excelNode.getCountryId(); String address = excelNode.getAddress();
IsoCode countryId = excelNode.getCountryId();
// Fallback-Werte für null
String addressStr = address != null ? address : "";
String countryStr = countryId != null ? countryId.name() : "";
return addressStr + ", " + countryStr;
} }
private Optional<BatchGeocodingResponse> executeBatchRequest(List<BatchItem> batchItems) { private Optional<BatchGeocodingResponse> executeBatchRequest(List<BatchItem> batchItems) {

View file

@ -15,6 +15,7 @@ import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import java.io.ByteArrayInputStream; import java.io.ByteArrayInputStream;
import java.io.IOException; import java.io.IOException;
@ -56,6 +57,7 @@ public class BulkImportService {
this.materialFastExcelMapper = materialFastExcelMapper; this.materialFastExcelMapper = materialFastExcelMapper;
} }
@Transactional
public void processOperation(BulkOperation op) throws IOException { public void processOperation(BulkOperation op) throws IOException {
var file = op.getFile(); var file = op.getFile();
var type = op.getFileType(); var type = op.getFileType();

View file

@ -9,6 +9,7 @@ import de.avatic.lcc.service.transformer.generic.NodeTransformer;
import de.avatic.lcc.util.exception.internalerror.ExcelValidationError; import de.avatic.lcc.util.exception.internalerror.ExcelValidationError;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
import java.math.BigDecimal;
import java.util.*; import java.util.*;
@Service @Service
@ -61,22 +62,26 @@ public class NodeBulkImportService {
} }
private boolean compare(Node updateNode, Node currentNode) { private boolean compare(Node updateNode, Node currentNode) {
return Objects.equals(updateNode.getName(), currentNode.getName()) &&
return updateNode.getName().equals(currentNode.getName()) && compareBigDecimal(updateNode.getGeoLat(), currentNode.getGeoLat()) &&
updateNode.getGeoLat().compareTo(currentNode.getGeoLat()) == 0 && compareBigDecimal(updateNode.getGeoLng(), currentNode.getGeoLng()) &&
updateNode.getGeoLng().compareTo(currentNode.getGeoLng()) == 0 && Objects.equals(updateNode.getExternalMappingId(), currentNode.getExternalMappingId()) &&
updateNode.getExternalMappingId().equals(currentNode.getExternalMappingId()) && Objects.equals(updateNode.getCountryId(), currentNode.getCountryId()) &&
updateNode.getCountryId().equals(currentNode.getCountryId()) && Objects.equals(updateNode.getIntermediate(), currentNode.getIntermediate()) &&
updateNode.getIntermediate().equals(currentNode.getIntermediate()) && Objects.equals(updateNode.getDestination(), currentNode.getDestination()) &&
updateNode.getDestination().equals(currentNode.getDestination()) && Objects.equals(updateNode.getSource(), currentNode.getSource()) &&
updateNode.getSource().equals(currentNode.getSource()) && Objects.equals(updateNode.getAddress(), currentNode.getAddress()) &&
updateNode.getAddress().equals(currentNode.getAddress()) && Objects.equals(updateNode.getDeprecated(), currentNode.getDeprecated()) &&
updateNode.getDeprecated().equals(currentNode.getDeprecated()) && Objects.equals(updateNode.getId(), currentNode.getId()) &&
updateNode.getId().equals(currentNode.getId()) && Objects.equals(updateNode.getPredecessorRequired(), currentNode.getPredecessorRequired()) &&
updateNode.getPredecessorRequired().equals(currentNode.getPredecessorRequired()) &&
compare(updateNode.getNodePredecessors(), currentNode.getNodePredecessors()) && compare(updateNode.getNodePredecessors(), currentNode.getNodePredecessors()) &&
compare(updateNode.getOutboundCountries(), currentNode.getOutboundCountries()); compare(updateNode.getOutboundCountries(), currentNode.getOutboundCountries());
}
private boolean compareBigDecimal(BigDecimal a, BigDecimal b) {
if (a == null && b == null) return true;
if (a == null || b == null) return false;
return a.compareTo(b) == 0;
} }
private boolean compare(Collection<Integer> outbound1, Collection<Integer> outbound2) { private boolean compare(Collection<Integer> outbound1, Collection<Integer> outbound2) {