Fix: More stable bulk geocoding. Added @Transactional to outer bulk service call, to revert all changes to database if anything fails

This commit is contained in:
Jan 2026-01-23 16:57:06 +01:00
parent 605bcfe0fc
commit 417221eca8
3 changed files with 196 additions and 62 deletions

View file

@ -49,6 +49,7 @@ public class BatchGeoApiService {
ArrayList<BulkInstruction<ExcelNode>> noGeo = new ArrayList<>();
ArrayList<BulkInstruction<ExcelNode>> failedGeoLookups = new ArrayList<>();
ArrayList<BulkInstruction<ExcelNode>> failedFuzzyGeoLookups = new ArrayList<>();
int totalSuccessful = 0;
for (var node : nodes) {
@ -57,7 +58,6 @@ public class BatchGeoApiService {
}
}
for (int currentBatch = 0; currentBatch < noGeo.size(); currentBatch += MAX_BATCH_SIZE) {
int end = Math.min(currentBatch + MAX_BATCH_SIZE, noGeo.size());
var chunk = noGeo.subList(currentBatch, end);
@ -67,33 +67,108 @@ public class BatchGeoApiService {
.toList());
if (chunkResult.isPresent()) {
var response = chunkResult.get();
totalSuccessful += chunkResult.get().getSummary().getSuccessfulRequests();
if (response.getSummary() != null && response.getSummary().getSuccessfulRequests() != null) {
totalSuccessful += response.getSummary().getSuccessfulRequests();
}
if (response.getBatchItems() == null || response.getBatchItems().isEmpty()) {
logger.warn("Batch response contains no items");
failedGeoLookups.addAll(chunk);
continue;
}
for (int itemIdx = 0; itemIdx < chunk.size(); itemIdx++) {
var result = chunkResult.get().getBatchItems().get(itemIdx);
if (itemIdx >= response.getBatchItems().size()) {
logger.warn("BatchItems size mismatch at index {}", itemIdx);
failedGeoLookups.add(chunk.get(itemIdx));
continue;
}
var result = response.getBatchItems().get(itemIdx);
var node = chunk.get(itemIdx).getEntity();
if (!result.getFeatures().isEmpty() &&
(result.getFeatures().getFirst().getProperties().getConfidence().equalsIgnoreCase("high") ||
result.getFeatures().getFirst().getProperties().getConfidence().equalsIgnoreCase("medium") ||
(result.getFeatures().getFirst().getProperties().getMatchCodes() != null &&
result.getFeatures().getFirst().getProperties().getMatchCodes().stream().anyMatch(s -> s.equalsIgnoreCase("good"))))) {
var geometry = result.getFeatures().getFirst().getGeometry();
var properties = result.getFeatures().getFirst().getProperties();
node.setGeoLng(BigDecimal.valueOf(geometry.getCoordinates().get(0)));
node.setGeoLat(BigDecimal.valueOf(geometry.getCoordinates().get(1)));
node.setAddress(properties.getAddress().getFormattedAddress());
node.setCountryId(IsoCode.valueOf(properties.getAddress().getCountryRegion().getIso()));
} else {
logger.warn("Geocoding failed for address {}", node.getAddress());
if (result == null || result.getFeatures() == null || result.getFeatures().isEmpty()) {
logger.warn("No geocoding result for address {}",
node.getAddress() != null ? node.getAddress() : "unknown");
failedGeoLookups.add(chunk.get(itemIdx));
//throw new ExcelValidationError("Unable to geocode " + node.getName() + ". Please check your address or enter geo position yourself.");
continue;
}
var feature = result.getFeatures().getFirst();
if (feature == null) {
logger.warn("Feature is null for address {}", node.getAddress());
failedGeoLookups.add(chunk.get(itemIdx));
continue;
}
var properties = feature.getProperties();
if (properties == null) {
logger.warn("Properties is null for address {}", node.getAddress());
failedGeoLookups.add(chunk.get(itemIdx));
continue;
}
String confidence = properties.getConfidence();
boolean hasGoodConfidence = confidence != null &&
(confidence.equalsIgnoreCase("high") ||
confidence.equalsIgnoreCase("medium"));
boolean hasGoodMatchCode = properties.getMatchCodes() != null &&
properties.getMatchCodes().stream()
.anyMatch(s -> s != null && s.equalsIgnoreCase("good"));
if (hasGoodConfidence || hasGoodMatchCode) {
var geometry = feature.getGeometry();
if (geometry == null || geometry.getCoordinates() == null ||
geometry.getCoordinates().size() < 2) {
logger.warn("Invalid geometry for address {}", node.getAddress());
failedGeoLookups.add(chunk.get(itemIdx));
continue;
}
var coordinates = geometry.getCoordinates();
if (coordinates.get(0) == null || coordinates.get(1) == null) {
logger.warn("Null coordinates for address {}", node.getAddress());
failedGeoLookups.add(chunk.get(itemIdx));
continue;
}
node.setGeoLng(BigDecimal.valueOf(coordinates.get(0)));
node.setGeoLat(BigDecimal.valueOf(coordinates.get(1)));
if (properties.getAddress() != null &&
properties.getAddress().getFormattedAddress() != null) {
node.setAddress(properties.getAddress().getFormattedAddress());
}
if (properties.getAddress() != null &&
properties.getAddress().getCountryRegion() != null &&
properties.getAddress().getCountryRegion().getIso() != null) {
try {
node.setCountryId(IsoCode.valueOf(
properties.getAddress().getCountryRegion().getIso()));
} catch (IllegalArgumentException e) {
logger.warn("Invalid ISO code: {}",
properties.getAddress().getCountryRegion().getIso());
}
}
} else {
logger.warn("Geocoding failed for address {} (low confidence)",
node.getAddress());
failedGeoLookups.add(chunk.get(itemIdx));
}
}
} else {
logger.warn("Batch request returned empty result");
failedGeoLookups.addAll(chunk);
}
}
// Second pass: fuzzy lookup with company name for failed addresses
if (!failedGeoLookups.isEmpty()) {
@ -108,31 +183,52 @@ public class BatchGeoApiService {
&& !fuzzyResult.get().getResults().isEmpty()) {
var result = fuzzyResult.get().getResults().getFirst();
if (result == null) {
logger.warn("Fuzzy result is null for: {}", node.getName());
failedFuzzyGeoLookups.add(instruction);
continue;
}
// Score >= 0.7 means good confidence (1.0 = perfect match)
if (result.getScore() >= 7.0) {
node.setGeoLat(BigDecimal.valueOf(result.getPosition().getLat()));
node.setGeoLng(BigDecimal.valueOf(result.getPosition().getLon()));
double score = result.getScore();
if (score < 7.0) {
logger.warn("Fuzzy search returned low confidence result for: {} (score: {})",
node.getName(), score);
failedFuzzyGeoLookups.add(instruction);
continue;
}
if (result.getPosition() == null) {
logger.warn("Position is null for: {}", node.getName());
failedFuzzyGeoLookups.add(instruction);
continue;
}
double lat = result.getPosition().getLat();
double lon = result.getPosition().getLon();
node.setGeoLat(BigDecimal.valueOf(lat));
node.setGeoLng(BigDecimal.valueOf(lon));
if (result.getAddress() != null &&
result.getAddress().getFreeformAddress() != null) {
node.setAddress(result.getAddress().getFreeformAddress());
}
// Update country if it differs
if (result.getAddress().getCountryCode() != null) {
if (result.getAddress() != null &&
result.getAddress().getCountryCode() != null) {
try {
node.setCountryId(IsoCode.valueOf(result.getAddress().getCountryCode()));
} catch (IllegalArgumentException e) {
logger.warn("Unknown country code: {}", result.getAddress().getCountryCode());
logger.warn("Unknown country code: {}",
result.getAddress().getCountryCode());
failedFuzzyGeoLookups.add(instruction);
continue;
}
}
fuzzySuccessful++;
logger.info("Fuzzy search successful for: {} (score: {})",
node.getName(), result.getScore());
} else {
logger.warn("Fuzzy search returned low confidence result for: {} (score: {})",
node.getName(), result.getScore());
}
} else {
logger.error("Fuzzy search found no results for: {}", node.getName());
node.getName(), score);
}
}
@ -140,8 +236,10 @@ public class BatchGeoApiService {
fuzzySuccessful, failedGeoLookups.size());
// Throw error for remaining failed lookups
int remainingFailed = failedGeoLookups.size() - fuzzySuccessful;
if (remainingFailed > 0) {
if (!failedFuzzyGeoLookups.isEmpty()) {
failedFuzzyGeoLookups.forEach(instruction -> {logger.warn("Lookup finally failed for: {}", instruction.getEntity().getName());});
var firstFailed = failedGeoLookups.stream()
.filter(i -> i.getEntity().getGeoLat() == null)
.findFirst()
@ -149,7 +247,9 @@ public class BatchGeoApiService {
.orElse(null);
if (firstFailed != null) {
throw new ExcelValidationError("Unable to geocode " + firstFailed.getName()
String name = firstFailed.getName() != null ?
firstFailed.getName() : "unknown";
throw new ExcelValidationError("Unable to geocode " + name
+ ". Please check your address or enter geo position yourself.");
}
}
@ -159,13 +259,32 @@ public class BatchGeoApiService {
private Optional<FuzzySearchResponse> executeFuzzySearch(ExcelNode node) {
try {
String companyName = node.getName();
String country = node.getCountryId().name();
if (companyName == null) {
logger.warn("Company name is null for fuzzy search");
return Optional.empty();
}
IsoCode countryId = node.getCountryId();
if (countryId == null) {
logger.warn("Country ID is null for fuzzy search: {}", companyName);
return Optional.empty();
}
String country = countryId.name();
String address = node.getAddress();
if (address == null) {
logger.warn("Address is null for fuzzy search: {}", companyName);
address = ""; // Fallback zu leerem String
}
// Normalisiere Unicode für konsistente Suche
companyName = java.text.Normalizer.normalize(companyName, java.text.Normalizer.Form.NFC);
companyName = java.text.Normalizer.normalize(companyName,
java.text.Normalizer.Form.NFC);
// URL-Encoding
String encodedQuery = URLEncoder.encode(companyName + ", " + node.getAddress() + ", " + country, StandardCharsets.UTF_8);
String encodedQuery = URLEncoder.encode(
companyName + ", " + address + ", " + country,
StandardCharsets.UTF_8);
String url = String.format(
"https://atlas.microsoft.com/search/fuzzy/json?api-version=1.0&subscription-key=%s&query=%s&limit=5",
@ -185,13 +304,21 @@ public class BatchGeoApiService {
return Optional.ofNullable(response.getBody());
} catch (Exception e) {
logger.error("Fuzzy search failed for {}", node.getName(), e);
logger.error("Fuzzy search failed for {}",
node.getName() != null ? node.getName() : "unknown", e);
return Optional.empty();
}
}
private String getGeoCodeString(ExcelNode excelNode) {
return excelNode.getAddress() + ", " + excelNode.getCountryId();
String address = excelNode.getAddress();
IsoCode countryId = excelNode.getCountryId();
// Fallback-Werte für null
String addressStr = address != null ? address : "";
String countryStr = countryId != null ? countryId.name() : "";
return addressStr + ", " + countryStr;
}
private Optional<BatchGeocodingResponse> executeBatchRequest(List<BatchItem> batchItems) {

View file

@ -15,6 +15,7 @@ import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import java.io.ByteArrayInputStream;
import java.io.IOException;
@ -56,6 +57,7 @@ public class BulkImportService {
this.materialFastExcelMapper = materialFastExcelMapper;
}
@Transactional
public void processOperation(BulkOperation op) throws IOException {
var file = op.getFile();
var type = op.getFileType();

View file

@ -9,6 +9,7 @@ import de.avatic.lcc.service.transformer.generic.NodeTransformer;
import de.avatic.lcc.util.exception.internalerror.ExcelValidationError;
import org.springframework.stereotype.Service;
import java.math.BigDecimal;
import java.util.*;
@Service
@ -61,22 +62,26 @@ public class NodeBulkImportService {
}
private boolean compare(Node updateNode, Node currentNode) {
return updateNode.getName().equals(currentNode.getName()) &&
updateNode.getGeoLat().compareTo(currentNode.getGeoLat()) == 0 &&
updateNode.getGeoLng().compareTo(currentNode.getGeoLng()) == 0 &&
updateNode.getExternalMappingId().equals(currentNode.getExternalMappingId()) &&
updateNode.getCountryId().equals(currentNode.getCountryId()) &&
updateNode.getIntermediate().equals(currentNode.getIntermediate()) &&
updateNode.getDestination().equals(currentNode.getDestination()) &&
updateNode.getSource().equals(currentNode.getSource()) &&
updateNode.getAddress().equals(currentNode.getAddress()) &&
updateNode.getDeprecated().equals(currentNode.getDeprecated()) &&
updateNode.getId().equals(currentNode.getId()) &&
updateNode.getPredecessorRequired().equals(currentNode.getPredecessorRequired()) &&
return Objects.equals(updateNode.getName(), currentNode.getName()) &&
compareBigDecimal(updateNode.getGeoLat(), currentNode.getGeoLat()) &&
compareBigDecimal(updateNode.getGeoLng(), currentNode.getGeoLng()) &&
Objects.equals(updateNode.getExternalMappingId(), currentNode.getExternalMappingId()) &&
Objects.equals(updateNode.getCountryId(), currentNode.getCountryId()) &&
Objects.equals(updateNode.getIntermediate(), currentNode.getIntermediate()) &&
Objects.equals(updateNode.getDestination(), currentNode.getDestination()) &&
Objects.equals(updateNode.getSource(), currentNode.getSource()) &&
Objects.equals(updateNode.getAddress(), currentNode.getAddress()) &&
Objects.equals(updateNode.getDeprecated(), currentNode.getDeprecated()) &&
Objects.equals(updateNode.getId(), currentNode.getId()) &&
Objects.equals(updateNode.getPredecessorRequired(), currentNode.getPredecessorRequired()) &&
compare(updateNode.getNodePredecessors(), currentNode.getNodePredecessors()) &&
compare(updateNode.getOutboundCountries(), currentNode.getOutboundCountries());
}
private boolean compareBigDecimal(BigDecimal a, BigDecimal b) {
if (a == null && b == null) return true;
if (a == null || b == null) return false;
return a.compareTo(b) == 0;
}
private boolean compare(Collection<Integer> outbound1, Collection<Integer> outbound2) {