Fix: More stable bulk geocoding. Added @Transactional to outer bulk service call, to revert all changes to database if anything fails
This commit is contained in:
parent
605bcfe0fc
commit
417221eca8
3 changed files with 196 additions and 62 deletions
|
|
@ -49,6 +49,7 @@ public class BatchGeoApiService {
|
|||
|
||||
ArrayList<BulkInstruction<ExcelNode>> noGeo = new ArrayList<>();
|
||||
ArrayList<BulkInstruction<ExcelNode>> failedGeoLookups = new ArrayList<>();
|
||||
ArrayList<BulkInstruction<ExcelNode>> failedFuzzyGeoLookups = new ArrayList<>();
|
||||
int totalSuccessful = 0;
|
||||
|
||||
for (var node : nodes) {
|
||||
|
|
@ -57,7 +58,6 @@ public class BatchGeoApiService {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
for (int currentBatch = 0; currentBatch < noGeo.size(); currentBatch += MAX_BATCH_SIZE) {
|
||||
int end = Math.min(currentBatch + MAX_BATCH_SIZE, noGeo.size());
|
||||
var chunk = noGeo.subList(currentBatch, end);
|
||||
|
|
@ -67,33 +67,108 @@ public class BatchGeoApiService {
|
|||
.toList());
|
||||
|
||||
if (chunkResult.isPresent()) {
|
||||
var response = chunkResult.get();
|
||||
|
||||
totalSuccessful += chunkResult.get().getSummary().getSuccessfulRequests();
|
||||
|
||||
if (response.getSummary() != null && response.getSummary().getSuccessfulRequests() != null) {
|
||||
totalSuccessful += response.getSummary().getSuccessfulRequests();
|
||||
}
|
||||
|
||||
if (response.getBatchItems() == null || response.getBatchItems().isEmpty()) {
|
||||
logger.warn("Batch response contains no items");
|
||||
failedGeoLookups.addAll(chunk);
|
||||
continue;
|
||||
}
|
||||
|
||||
for (int itemIdx = 0; itemIdx < chunk.size(); itemIdx++) {
|
||||
var result = chunkResult.get().getBatchItems().get(itemIdx);
|
||||
|
||||
if (itemIdx >= response.getBatchItems().size()) {
|
||||
logger.warn("BatchItems size mismatch at index {}", itemIdx);
|
||||
failedGeoLookups.add(chunk.get(itemIdx));
|
||||
continue;
|
||||
}
|
||||
|
||||
var result = response.getBatchItems().get(itemIdx);
|
||||
var node = chunk.get(itemIdx).getEntity();
|
||||
|
||||
if (!result.getFeatures().isEmpty() &&
|
||||
(result.getFeatures().getFirst().getProperties().getConfidence().equalsIgnoreCase("high") ||
|
||||
result.getFeatures().getFirst().getProperties().getConfidence().equalsIgnoreCase("medium") ||
|
||||
(result.getFeatures().getFirst().getProperties().getMatchCodes() != null &&
|
||||
result.getFeatures().getFirst().getProperties().getMatchCodes().stream().anyMatch(s -> s.equalsIgnoreCase("good"))))) {
|
||||
var geometry = result.getFeatures().getFirst().getGeometry();
|
||||
var properties = result.getFeatures().getFirst().getProperties();
|
||||
node.setGeoLng(BigDecimal.valueOf(geometry.getCoordinates().get(0)));
|
||||
node.setGeoLat(BigDecimal.valueOf(geometry.getCoordinates().get(1)));
|
||||
node.setAddress(properties.getAddress().getFormattedAddress());
|
||||
node.setCountryId(IsoCode.valueOf(properties.getAddress().getCountryRegion().getIso()));
|
||||
} else {
|
||||
logger.warn("Geocoding failed for address {}", node.getAddress());
|
||||
|
||||
if (result == null || result.getFeatures() == null || result.getFeatures().isEmpty()) {
|
||||
logger.warn("No geocoding result for address {}",
|
||||
node.getAddress() != null ? node.getAddress() : "unknown");
|
||||
failedGeoLookups.add(chunk.get(itemIdx));
|
||||
//throw new ExcelValidationError("Unable to geocode " + node.getName() + ". Please check your address or enter geo position yourself.");
|
||||
continue;
|
||||
}
|
||||
|
||||
var feature = result.getFeatures().getFirst();
|
||||
if (feature == null) {
|
||||
logger.warn("Feature is null for address {}", node.getAddress());
|
||||
failedGeoLookups.add(chunk.get(itemIdx));
|
||||
continue;
|
||||
}
|
||||
|
||||
var properties = feature.getProperties();
|
||||
if (properties == null) {
|
||||
logger.warn("Properties is null for address {}", node.getAddress());
|
||||
failedGeoLookups.add(chunk.get(itemIdx));
|
||||
continue;
|
||||
}
|
||||
|
||||
String confidence = properties.getConfidence();
|
||||
boolean hasGoodConfidence = confidence != null &&
|
||||
(confidence.equalsIgnoreCase("high") ||
|
||||
confidence.equalsIgnoreCase("medium"));
|
||||
|
||||
boolean hasGoodMatchCode = properties.getMatchCodes() != null &&
|
||||
properties.getMatchCodes().stream()
|
||||
.anyMatch(s -> s != null && s.equalsIgnoreCase("good"));
|
||||
|
||||
if (hasGoodConfidence || hasGoodMatchCode) {
|
||||
var geometry = feature.getGeometry();
|
||||
if (geometry == null || geometry.getCoordinates() == null ||
|
||||
geometry.getCoordinates().size() < 2) {
|
||||
logger.warn("Invalid geometry for address {}", node.getAddress());
|
||||
failedGeoLookups.add(chunk.get(itemIdx));
|
||||
continue;
|
||||
}
|
||||
|
||||
var coordinates = geometry.getCoordinates();
|
||||
if (coordinates.get(0) == null || coordinates.get(1) == null) {
|
||||
logger.warn("Null coordinates for address {}", node.getAddress());
|
||||
failedGeoLookups.add(chunk.get(itemIdx));
|
||||
continue;
|
||||
}
|
||||
|
||||
node.setGeoLng(BigDecimal.valueOf(coordinates.get(0)));
|
||||
node.setGeoLat(BigDecimal.valueOf(coordinates.get(1)));
|
||||
|
||||
if (properties.getAddress() != null &&
|
||||
properties.getAddress().getFormattedAddress() != null) {
|
||||
node.setAddress(properties.getAddress().getFormattedAddress());
|
||||
}
|
||||
|
||||
if (properties.getAddress() != null &&
|
||||
properties.getAddress().getCountryRegion() != null &&
|
||||
properties.getAddress().getCountryRegion().getIso() != null) {
|
||||
try {
|
||||
node.setCountryId(IsoCode.valueOf(
|
||||
properties.getAddress().getCountryRegion().getIso()));
|
||||
} catch (IllegalArgumentException e) {
|
||||
logger.warn("Invalid ISO code: {}",
|
||||
properties.getAddress().getCountryRegion().getIso());
|
||||
}
|
||||
}
|
||||
} else {
|
||||
logger.warn("Geocoding failed for address {} (low confidence)",
|
||||
node.getAddress());
|
||||
failedGeoLookups.add(chunk.get(itemIdx));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
logger.warn("Batch request returned empty result");
|
||||
failedGeoLookups.addAll(chunk);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Second pass: fuzzy lookup with company name for failed addresses
|
||||
if (!failedGeoLookups.isEmpty()) {
|
||||
|
|
@ -108,31 +183,52 @@ public class BatchGeoApiService {
|
|||
&& !fuzzyResult.get().getResults().isEmpty()) {
|
||||
|
||||
var result = fuzzyResult.get().getResults().getFirst();
|
||||
if (result == null) {
|
||||
logger.warn("Fuzzy result is null for: {}", node.getName());
|
||||
failedFuzzyGeoLookups.add(instruction);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Score >= 0.7 means good confidence (1.0 = perfect match)
|
||||
if (result.getScore() >= 7.0) {
|
||||
node.setGeoLat(BigDecimal.valueOf(result.getPosition().getLat()));
|
||||
node.setGeoLng(BigDecimal.valueOf(result.getPosition().getLon()));
|
||||
double score = result.getScore();
|
||||
if (score < 7.0) {
|
||||
logger.warn("Fuzzy search returned low confidence result for: {} (score: {})",
|
||||
node.getName(), score);
|
||||
failedFuzzyGeoLookups.add(instruction);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (result.getPosition() == null) {
|
||||
logger.warn("Position is null for: {}", node.getName());
|
||||
failedFuzzyGeoLookups.add(instruction);
|
||||
continue;
|
||||
}
|
||||
|
||||
double lat = result.getPosition().getLat();
|
||||
double lon = result.getPosition().getLon();
|
||||
|
||||
node.setGeoLat(BigDecimal.valueOf(lat));
|
||||
node.setGeoLng(BigDecimal.valueOf(lon));
|
||||
|
||||
if (result.getAddress() != null &&
|
||||
result.getAddress().getFreeformAddress() != null) {
|
||||
node.setAddress(result.getAddress().getFreeformAddress());
|
||||
}
|
||||
|
||||
// Update country if it differs
|
||||
if (result.getAddress().getCountryCode() != null) {
|
||||
if (result.getAddress() != null &&
|
||||
result.getAddress().getCountryCode() != null) {
|
||||
try {
|
||||
node.setCountryId(IsoCode.valueOf(result.getAddress().getCountryCode()));
|
||||
} catch (IllegalArgumentException e) {
|
||||
logger.warn("Unknown country code: {}", result.getAddress().getCountryCode());
|
||||
logger.warn("Unknown country code: {}",
|
||||
result.getAddress().getCountryCode());
|
||||
failedFuzzyGeoLookups.add(instruction);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
fuzzySuccessful++;
|
||||
logger.info("Fuzzy search successful for: {} (score: {})",
|
||||
node.getName(), result.getScore());
|
||||
} else {
|
||||
logger.warn("Fuzzy search returned low confidence result for: {} (score: {})",
|
||||
node.getName(), result.getScore());
|
||||
}
|
||||
} else {
|
||||
logger.error("Fuzzy search found no results for: {}", node.getName());
|
||||
node.getName(), score);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -140,8 +236,10 @@ public class BatchGeoApiService {
|
|||
fuzzySuccessful, failedGeoLookups.size());
|
||||
|
||||
// Throw error for remaining failed lookups
|
||||
int remainingFailed = failedGeoLookups.size() - fuzzySuccessful;
|
||||
if (remainingFailed > 0) {
|
||||
if (!failedFuzzyGeoLookups.isEmpty()) {
|
||||
|
||||
failedFuzzyGeoLookups.forEach(instruction -> {logger.warn("Lookup finally failed for: {}", instruction.getEntity().getName());});
|
||||
|
||||
var firstFailed = failedGeoLookups.stream()
|
||||
.filter(i -> i.getEntity().getGeoLat() == null)
|
||||
.findFirst()
|
||||
|
|
@ -149,7 +247,9 @@ public class BatchGeoApiService {
|
|||
.orElse(null);
|
||||
|
||||
if (firstFailed != null) {
|
||||
throw new ExcelValidationError("Unable to geocode " + firstFailed.getName()
|
||||
String name = firstFailed.getName() != null ?
|
||||
firstFailed.getName() : "unknown";
|
||||
throw new ExcelValidationError("Unable to geocode " + name
|
||||
+ ". Please check your address or enter geo position yourself.");
|
||||
}
|
||||
}
|
||||
|
|
@ -159,13 +259,32 @@ public class BatchGeoApiService {
|
|||
private Optional<FuzzySearchResponse> executeFuzzySearch(ExcelNode node) {
|
||||
try {
|
||||
String companyName = node.getName();
|
||||
String country = node.getCountryId().name();
|
||||
if (companyName == null) {
|
||||
logger.warn("Company name is null for fuzzy search");
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
IsoCode countryId = node.getCountryId();
|
||||
if (countryId == null) {
|
||||
logger.warn("Country ID is null for fuzzy search: {}", companyName);
|
||||
return Optional.empty();
|
||||
}
|
||||
String country = countryId.name();
|
||||
|
||||
String address = node.getAddress();
|
||||
if (address == null) {
|
||||
logger.warn("Address is null for fuzzy search: {}", companyName);
|
||||
address = ""; // Fallback zu leerem String
|
||||
}
|
||||
|
||||
// Normalisiere Unicode für konsistente Suche
|
||||
companyName = java.text.Normalizer.normalize(companyName, java.text.Normalizer.Form.NFC);
|
||||
companyName = java.text.Normalizer.normalize(companyName,
|
||||
java.text.Normalizer.Form.NFC);
|
||||
|
||||
// URL-Encoding
|
||||
String encodedQuery = URLEncoder.encode(companyName + ", " + node.getAddress() + ", " + country, StandardCharsets.UTF_8);
|
||||
String encodedQuery = URLEncoder.encode(
|
||||
companyName + ", " + address + ", " + country,
|
||||
StandardCharsets.UTF_8);
|
||||
|
||||
String url = String.format(
|
||||
"https://atlas.microsoft.com/search/fuzzy/json?api-version=1.0&subscription-key=%s&query=%s&limit=5",
|
||||
|
|
@ -185,13 +304,21 @@ public class BatchGeoApiService {
|
|||
return Optional.ofNullable(response.getBody());
|
||||
|
||||
} catch (Exception e) {
|
||||
logger.error("Fuzzy search failed for {}", node.getName(), e);
|
||||
logger.error("Fuzzy search failed for {}",
|
||||
node.getName() != null ? node.getName() : "unknown", e);
|
||||
return Optional.empty();
|
||||
}
|
||||
}
|
||||
|
||||
private String getGeoCodeString(ExcelNode excelNode) {
|
||||
return excelNode.getAddress() + ", " + excelNode.getCountryId();
|
||||
String address = excelNode.getAddress();
|
||||
IsoCode countryId = excelNode.getCountryId();
|
||||
|
||||
// Fallback-Werte für null
|
||||
String addressStr = address != null ? address : "";
|
||||
String countryStr = countryId != null ? countryId.name() : "";
|
||||
|
||||
return addressStr + ", " + countryStr;
|
||||
}
|
||||
|
||||
private Optional<BatchGeocodingResponse> executeBatchRequest(List<BatchItem> batchItems) {
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ import org.apache.poi.xssf.usermodel.XSSFWorkbook;
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.transaction.annotation.Transactional;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.IOException;
|
||||
|
|
@ -56,6 +57,7 @@ public class BulkImportService {
|
|||
this.materialFastExcelMapper = materialFastExcelMapper;
|
||||
}
|
||||
|
||||
@Transactional
|
||||
public void processOperation(BulkOperation op) throws IOException {
|
||||
var file = op.getFile();
|
||||
var type = op.getFileType();
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ import de.avatic.lcc.service.transformer.generic.NodeTransformer;
|
|||
import de.avatic.lcc.util.exception.internalerror.ExcelValidationError;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
import java.util.*;
|
||||
|
||||
@Service
|
||||
|
|
@ -61,22 +62,26 @@ public class NodeBulkImportService {
|
|||
}
|
||||
|
||||
private boolean compare(Node updateNode, Node currentNode) {
|
||||
|
||||
return updateNode.getName().equals(currentNode.getName()) &&
|
||||
updateNode.getGeoLat().compareTo(currentNode.getGeoLat()) == 0 &&
|
||||
updateNode.getGeoLng().compareTo(currentNode.getGeoLng()) == 0 &&
|
||||
updateNode.getExternalMappingId().equals(currentNode.getExternalMappingId()) &&
|
||||
updateNode.getCountryId().equals(currentNode.getCountryId()) &&
|
||||
updateNode.getIntermediate().equals(currentNode.getIntermediate()) &&
|
||||
updateNode.getDestination().equals(currentNode.getDestination()) &&
|
||||
updateNode.getSource().equals(currentNode.getSource()) &&
|
||||
updateNode.getAddress().equals(currentNode.getAddress()) &&
|
||||
updateNode.getDeprecated().equals(currentNode.getDeprecated()) &&
|
||||
updateNode.getId().equals(currentNode.getId()) &&
|
||||
updateNode.getPredecessorRequired().equals(currentNode.getPredecessorRequired()) &&
|
||||
return Objects.equals(updateNode.getName(), currentNode.getName()) &&
|
||||
compareBigDecimal(updateNode.getGeoLat(), currentNode.getGeoLat()) &&
|
||||
compareBigDecimal(updateNode.getGeoLng(), currentNode.getGeoLng()) &&
|
||||
Objects.equals(updateNode.getExternalMappingId(), currentNode.getExternalMappingId()) &&
|
||||
Objects.equals(updateNode.getCountryId(), currentNode.getCountryId()) &&
|
||||
Objects.equals(updateNode.getIntermediate(), currentNode.getIntermediate()) &&
|
||||
Objects.equals(updateNode.getDestination(), currentNode.getDestination()) &&
|
||||
Objects.equals(updateNode.getSource(), currentNode.getSource()) &&
|
||||
Objects.equals(updateNode.getAddress(), currentNode.getAddress()) &&
|
||||
Objects.equals(updateNode.getDeprecated(), currentNode.getDeprecated()) &&
|
||||
Objects.equals(updateNode.getId(), currentNode.getId()) &&
|
||||
Objects.equals(updateNode.getPredecessorRequired(), currentNode.getPredecessorRequired()) &&
|
||||
compare(updateNode.getNodePredecessors(), currentNode.getNodePredecessors()) &&
|
||||
compare(updateNode.getOutboundCountries(), currentNode.getOutboundCountries());
|
||||
}
|
||||
|
||||
private boolean compareBigDecimal(BigDecimal a, BigDecimal b) {
|
||||
if (a == null && b == null) return true;
|
||||
if (a == null || b == null) return false;
|
||||
return a.compareTo(b) == 0;
|
||||
}
|
||||
|
||||
private boolean compare(Collection<Integer> outbound1, Collection<Integer> outbound2) {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue