Fix: More stable bulk geocoding. Added @Transactional to outer bulk service call, to revert all changes to database if anything fails
This commit is contained in:
parent
605bcfe0fc
commit
417221eca8
3 changed files with 196 additions and 62 deletions
|
|
@ -49,6 +49,7 @@ public class BatchGeoApiService {
|
||||||
|
|
||||||
ArrayList<BulkInstruction<ExcelNode>> noGeo = new ArrayList<>();
|
ArrayList<BulkInstruction<ExcelNode>> noGeo = new ArrayList<>();
|
||||||
ArrayList<BulkInstruction<ExcelNode>> failedGeoLookups = new ArrayList<>();
|
ArrayList<BulkInstruction<ExcelNode>> failedGeoLookups = new ArrayList<>();
|
||||||
|
ArrayList<BulkInstruction<ExcelNode>> failedFuzzyGeoLookups = new ArrayList<>();
|
||||||
int totalSuccessful = 0;
|
int totalSuccessful = 0;
|
||||||
|
|
||||||
for (var node : nodes) {
|
for (var node : nodes) {
|
||||||
|
|
@ -57,7 +58,6 @@ public class BatchGeoApiService {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
for (int currentBatch = 0; currentBatch < noGeo.size(); currentBatch += MAX_BATCH_SIZE) {
|
for (int currentBatch = 0; currentBatch < noGeo.size(); currentBatch += MAX_BATCH_SIZE) {
|
||||||
int end = Math.min(currentBatch + MAX_BATCH_SIZE, noGeo.size());
|
int end = Math.min(currentBatch + MAX_BATCH_SIZE, noGeo.size());
|
||||||
var chunk = noGeo.subList(currentBatch, end);
|
var chunk = noGeo.subList(currentBatch, end);
|
||||||
|
|
@ -67,34 +67,109 @@ public class BatchGeoApiService {
|
||||||
.toList());
|
.toList());
|
||||||
|
|
||||||
if (chunkResult.isPresent()) {
|
if (chunkResult.isPresent()) {
|
||||||
|
var response = chunkResult.get();
|
||||||
|
|
||||||
totalSuccessful += chunkResult.get().getSummary().getSuccessfulRequests();
|
|
||||||
|
|
||||||
|
if (response.getSummary() != null && response.getSummary().getSuccessfulRequests() != null) {
|
||||||
|
totalSuccessful += response.getSummary().getSuccessfulRequests();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (response.getBatchItems() == null || response.getBatchItems().isEmpty()) {
|
||||||
|
logger.warn("Batch response contains no items");
|
||||||
|
failedGeoLookups.addAll(chunk);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
for (int itemIdx = 0; itemIdx < chunk.size(); itemIdx++) {
|
for (int itemIdx = 0; itemIdx < chunk.size(); itemIdx++) {
|
||||||
var result = chunkResult.get().getBatchItems().get(itemIdx);
|
|
||||||
|
if (itemIdx >= response.getBatchItems().size()) {
|
||||||
|
logger.warn("BatchItems size mismatch at index {}", itemIdx);
|
||||||
|
failedGeoLookups.add(chunk.get(itemIdx));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
var result = response.getBatchItems().get(itemIdx);
|
||||||
var node = chunk.get(itemIdx).getEntity();
|
var node = chunk.get(itemIdx).getEntity();
|
||||||
|
|
||||||
if (!result.getFeatures().isEmpty() &&
|
|
||||||
(result.getFeatures().getFirst().getProperties().getConfidence().equalsIgnoreCase("high") ||
|
if (result == null || result.getFeatures() == null || result.getFeatures().isEmpty()) {
|
||||||
result.getFeatures().getFirst().getProperties().getConfidence().equalsIgnoreCase("medium") ||
|
logger.warn("No geocoding result for address {}",
|
||||||
(result.getFeatures().getFirst().getProperties().getMatchCodes() != null &&
|
node.getAddress() != null ? node.getAddress() : "unknown");
|
||||||
result.getFeatures().getFirst().getProperties().getMatchCodes().stream().anyMatch(s -> s.equalsIgnoreCase("good"))))) {
|
failedGeoLookups.add(chunk.get(itemIdx));
|
||||||
var geometry = result.getFeatures().getFirst().getGeometry();
|
continue;
|
||||||
var properties = result.getFeatures().getFirst().getProperties();
|
}
|
||||||
node.setGeoLng(BigDecimal.valueOf(geometry.getCoordinates().get(0)));
|
|
||||||
node.setGeoLat(BigDecimal.valueOf(geometry.getCoordinates().get(1)));
|
var feature = result.getFeatures().getFirst();
|
||||||
node.setAddress(properties.getAddress().getFormattedAddress());
|
if (feature == null) {
|
||||||
node.setCountryId(IsoCode.valueOf(properties.getAddress().getCountryRegion().getIso()));
|
logger.warn("Feature is null for address {}", node.getAddress());
|
||||||
} else {
|
failedGeoLookups.add(chunk.get(itemIdx));
|
||||||
logger.warn("Geocoding failed for address {}", node.getAddress());
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
var properties = feature.getProperties();
|
||||||
|
if (properties == null) {
|
||||||
|
logger.warn("Properties is null for address {}", node.getAddress());
|
||||||
|
failedGeoLookups.add(chunk.get(itemIdx));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
String confidence = properties.getConfidence();
|
||||||
|
boolean hasGoodConfidence = confidence != null &&
|
||||||
|
(confidence.equalsIgnoreCase("high") ||
|
||||||
|
confidence.equalsIgnoreCase("medium"));
|
||||||
|
|
||||||
|
boolean hasGoodMatchCode = properties.getMatchCodes() != null &&
|
||||||
|
properties.getMatchCodes().stream()
|
||||||
|
.anyMatch(s -> s != null && s.equalsIgnoreCase("good"));
|
||||||
|
|
||||||
|
if (hasGoodConfidence || hasGoodMatchCode) {
|
||||||
|
var geometry = feature.getGeometry();
|
||||||
|
if (geometry == null || geometry.getCoordinates() == null ||
|
||||||
|
geometry.getCoordinates().size() < 2) {
|
||||||
|
logger.warn("Invalid geometry for address {}", node.getAddress());
|
||||||
|
failedGeoLookups.add(chunk.get(itemIdx));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
var coordinates = geometry.getCoordinates();
|
||||||
|
if (coordinates.get(0) == null || coordinates.get(1) == null) {
|
||||||
|
logger.warn("Null coordinates for address {}", node.getAddress());
|
||||||
|
failedGeoLookups.add(chunk.get(itemIdx));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
node.setGeoLng(BigDecimal.valueOf(coordinates.get(0)));
|
||||||
|
node.setGeoLat(BigDecimal.valueOf(coordinates.get(1)));
|
||||||
|
|
||||||
|
if (properties.getAddress() != null &&
|
||||||
|
properties.getAddress().getFormattedAddress() != null) {
|
||||||
|
node.setAddress(properties.getAddress().getFormattedAddress());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (properties.getAddress() != null &&
|
||||||
|
properties.getAddress().getCountryRegion() != null &&
|
||||||
|
properties.getAddress().getCountryRegion().getIso() != null) {
|
||||||
|
try {
|
||||||
|
node.setCountryId(IsoCode.valueOf(
|
||||||
|
properties.getAddress().getCountryRegion().getIso()));
|
||||||
|
} catch (IllegalArgumentException e) {
|
||||||
|
logger.warn("Invalid ISO code: {}",
|
||||||
|
properties.getAddress().getCountryRegion().getIso());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
logger.warn("Geocoding failed for address {} (low confidence)",
|
||||||
|
node.getAddress());
|
||||||
failedGeoLookups.add(chunk.get(itemIdx));
|
failedGeoLookups.add(chunk.get(itemIdx));
|
||||||
//throw new ExcelValidationError("Unable to geocode " + node.getName() + ". Please check your address or enter geo position yourself.");
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
logger.warn("Batch request returned empty result");
|
||||||
|
failedGeoLookups.addAll(chunk);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// Second pass: fuzzy lookup with company name for failed addresses
|
// Second pass: fuzzy lookup with company name for failed addresses
|
||||||
if (!failedGeoLookups.isEmpty()) {
|
if (!failedGeoLookups.isEmpty()) {
|
||||||
logger.info("Retrying {} failed lookups with fuzzy search", failedGeoLookups.size());
|
logger.info("Retrying {} failed lookups with fuzzy search", failedGeoLookups.size());
|
||||||
|
|
@ -108,31 +183,52 @@ public class BatchGeoApiService {
|
||||||
&& !fuzzyResult.get().getResults().isEmpty()) {
|
&& !fuzzyResult.get().getResults().isEmpty()) {
|
||||||
|
|
||||||
var result = fuzzyResult.get().getResults().getFirst();
|
var result = fuzzyResult.get().getResults().getFirst();
|
||||||
|
if (result == null) {
|
||||||
// Score >= 0.7 means good confidence (1.0 = perfect match)
|
logger.warn("Fuzzy result is null for: {}", node.getName());
|
||||||
if (result.getScore() >= 7.0) {
|
failedFuzzyGeoLookups.add(instruction);
|
||||||
node.setGeoLat(BigDecimal.valueOf(result.getPosition().getLat()));
|
continue;
|
||||||
node.setGeoLng(BigDecimal.valueOf(result.getPosition().getLon()));
|
|
||||||
node.setAddress(result.getAddress().getFreeformAddress());
|
|
||||||
|
|
||||||
// Update country if it differs
|
|
||||||
if (result.getAddress().getCountryCode() != null) {
|
|
||||||
try {
|
|
||||||
node.setCountryId(IsoCode.valueOf(result.getAddress().getCountryCode()));
|
|
||||||
} catch (IllegalArgumentException e) {
|
|
||||||
logger.warn("Unknown country code: {}", result.getAddress().getCountryCode());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fuzzySuccessful++;
|
|
||||||
logger.info("Fuzzy search successful for: {} (score: {})",
|
|
||||||
node.getName(), result.getScore());
|
|
||||||
} else {
|
|
||||||
logger.warn("Fuzzy search returned low confidence result for: {} (score: {})",
|
|
||||||
node.getName(), result.getScore());
|
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
logger.error("Fuzzy search found no results for: {}", node.getName());
|
double score = result.getScore();
|
||||||
|
if (score < 7.0) {
|
||||||
|
logger.warn("Fuzzy search returned low confidence result for: {} (score: {})",
|
||||||
|
node.getName(), score);
|
||||||
|
failedFuzzyGeoLookups.add(instruction);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (result.getPosition() == null) {
|
||||||
|
logger.warn("Position is null for: {}", node.getName());
|
||||||
|
failedFuzzyGeoLookups.add(instruction);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
double lat = result.getPosition().getLat();
|
||||||
|
double lon = result.getPosition().getLon();
|
||||||
|
|
||||||
|
node.setGeoLat(BigDecimal.valueOf(lat));
|
||||||
|
node.setGeoLng(BigDecimal.valueOf(lon));
|
||||||
|
|
||||||
|
if (result.getAddress() != null &&
|
||||||
|
result.getAddress().getFreeformAddress() != null) {
|
||||||
|
node.setAddress(result.getAddress().getFreeformAddress());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (result.getAddress() != null &&
|
||||||
|
result.getAddress().getCountryCode() != null) {
|
||||||
|
try {
|
||||||
|
node.setCountryId(IsoCode.valueOf(result.getAddress().getCountryCode()));
|
||||||
|
} catch (IllegalArgumentException e) {
|
||||||
|
logger.warn("Unknown country code: {}",
|
||||||
|
result.getAddress().getCountryCode());
|
||||||
|
failedFuzzyGeoLookups.add(instruction);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fuzzySuccessful++;
|
||||||
|
logger.info("Fuzzy search successful for: {} (score: {})",
|
||||||
|
node.getName(), score);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -140,8 +236,10 @@ public class BatchGeoApiService {
|
||||||
fuzzySuccessful, failedGeoLookups.size());
|
fuzzySuccessful, failedGeoLookups.size());
|
||||||
|
|
||||||
// Throw error for remaining failed lookups
|
// Throw error for remaining failed lookups
|
||||||
int remainingFailed = failedGeoLookups.size() - fuzzySuccessful;
|
if (!failedFuzzyGeoLookups.isEmpty()) {
|
||||||
if (remainingFailed > 0) {
|
|
||||||
|
failedFuzzyGeoLookups.forEach(instruction -> {logger.warn("Lookup finally failed for: {}", instruction.getEntity().getName());});
|
||||||
|
|
||||||
var firstFailed = failedGeoLookups.stream()
|
var firstFailed = failedGeoLookups.stream()
|
||||||
.filter(i -> i.getEntity().getGeoLat() == null)
|
.filter(i -> i.getEntity().getGeoLat() == null)
|
||||||
.findFirst()
|
.findFirst()
|
||||||
|
|
@ -149,7 +247,9 @@ public class BatchGeoApiService {
|
||||||
.orElse(null);
|
.orElse(null);
|
||||||
|
|
||||||
if (firstFailed != null) {
|
if (firstFailed != null) {
|
||||||
throw new ExcelValidationError("Unable to geocode " + firstFailed.getName()
|
String name = firstFailed.getName() != null ?
|
||||||
|
firstFailed.getName() : "unknown";
|
||||||
|
throw new ExcelValidationError("Unable to geocode " + name
|
||||||
+ ". Please check your address or enter geo position yourself.");
|
+ ". Please check your address or enter geo position yourself.");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -159,13 +259,32 @@ public class BatchGeoApiService {
|
||||||
private Optional<FuzzySearchResponse> executeFuzzySearch(ExcelNode node) {
|
private Optional<FuzzySearchResponse> executeFuzzySearch(ExcelNode node) {
|
||||||
try {
|
try {
|
||||||
String companyName = node.getName();
|
String companyName = node.getName();
|
||||||
String country = node.getCountryId().name();
|
if (companyName == null) {
|
||||||
|
logger.warn("Company name is null for fuzzy search");
|
||||||
|
return Optional.empty();
|
||||||
|
}
|
||||||
|
|
||||||
|
IsoCode countryId = node.getCountryId();
|
||||||
|
if (countryId == null) {
|
||||||
|
logger.warn("Country ID is null for fuzzy search: {}", companyName);
|
||||||
|
return Optional.empty();
|
||||||
|
}
|
||||||
|
String country = countryId.name();
|
||||||
|
|
||||||
|
String address = node.getAddress();
|
||||||
|
if (address == null) {
|
||||||
|
logger.warn("Address is null for fuzzy search: {}", companyName);
|
||||||
|
address = ""; // Fallback zu leerem String
|
||||||
|
}
|
||||||
|
|
||||||
// Normalisiere Unicode für konsistente Suche
|
// Normalisiere Unicode für konsistente Suche
|
||||||
companyName = java.text.Normalizer.normalize(companyName, java.text.Normalizer.Form.NFC);
|
companyName = java.text.Normalizer.normalize(companyName,
|
||||||
|
java.text.Normalizer.Form.NFC);
|
||||||
|
|
||||||
// URL-Encoding
|
// URL-Encoding
|
||||||
String encodedQuery = URLEncoder.encode(companyName + ", " + node.getAddress() + ", " + country, StandardCharsets.UTF_8);
|
String encodedQuery = URLEncoder.encode(
|
||||||
|
companyName + ", " + address + ", " + country,
|
||||||
|
StandardCharsets.UTF_8);
|
||||||
|
|
||||||
String url = String.format(
|
String url = String.format(
|
||||||
"https://atlas.microsoft.com/search/fuzzy/json?api-version=1.0&subscription-key=%s&query=%s&limit=5",
|
"https://atlas.microsoft.com/search/fuzzy/json?api-version=1.0&subscription-key=%s&query=%s&limit=5",
|
||||||
|
|
@ -185,13 +304,21 @@ public class BatchGeoApiService {
|
||||||
return Optional.ofNullable(response.getBody());
|
return Optional.ofNullable(response.getBody());
|
||||||
|
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
logger.error("Fuzzy search failed for {}", node.getName(), e);
|
logger.error("Fuzzy search failed for {}",
|
||||||
|
node.getName() != null ? node.getName() : "unknown", e);
|
||||||
return Optional.empty();
|
return Optional.empty();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private String getGeoCodeString(ExcelNode excelNode) {
|
private String getGeoCodeString(ExcelNode excelNode) {
|
||||||
return excelNode.getAddress() + ", " + excelNode.getCountryId();
|
String address = excelNode.getAddress();
|
||||||
|
IsoCode countryId = excelNode.getCountryId();
|
||||||
|
|
||||||
|
// Fallback-Werte für null
|
||||||
|
String addressStr = address != null ? address : "";
|
||||||
|
String countryStr = countryId != null ? countryId.name() : "";
|
||||||
|
|
||||||
|
return addressStr + ", " + countryStr;
|
||||||
}
|
}
|
||||||
|
|
||||||
private Optional<BatchGeocodingResponse> executeBatchRequest(List<BatchItem> batchItems) {
|
private Optional<BatchGeocodingResponse> executeBatchRequest(List<BatchItem> batchItems) {
|
||||||
|
|
|
||||||
|
|
@ -15,6 +15,7 @@ import org.apache.poi.xssf.usermodel.XSSFWorkbook;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
import org.springframework.stereotype.Service;
|
import org.springframework.stereotype.Service;
|
||||||
|
import org.springframework.transaction.annotation.Transactional;
|
||||||
|
|
||||||
import java.io.ByteArrayInputStream;
|
import java.io.ByteArrayInputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
@ -56,6 +57,7 @@ public class BulkImportService {
|
||||||
this.materialFastExcelMapper = materialFastExcelMapper;
|
this.materialFastExcelMapper = materialFastExcelMapper;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Transactional
|
||||||
public void processOperation(BulkOperation op) throws IOException {
|
public void processOperation(BulkOperation op) throws IOException {
|
||||||
var file = op.getFile();
|
var file = op.getFile();
|
||||||
var type = op.getFileType();
|
var type = op.getFileType();
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,7 @@ import de.avatic.lcc.service.transformer.generic.NodeTransformer;
|
||||||
import de.avatic.lcc.util.exception.internalerror.ExcelValidationError;
|
import de.avatic.lcc.util.exception.internalerror.ExcelValidationError;
|
||||||
import org.springframework.stereotype.Service;
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
|
import java.math.BigDecimal;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
@Service
|
@Service
|
||||||
|
|
@ -61,22 +62,26 @@ public class NodeBulkImportService {
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean compare(Node updateNode, Node currentNode) {
|
private boolean compare(Node updateNode, Node currentNode) {
|
||||||
|
return Objects.equals(updateNode.getName(), currentNode.getName()) &&
|
||||||
return updateNode.getName().equals(currentNode.getName()) &&
|
compareBigDecimal(updateNode.getGeoLat(), currentNode.getGeoLat()) &&
|
||||||
updateNode.getGeoLat().compareTo(currentNode.getGeoLat()) == 0 &&
|
compareBigDecimal(updateNode.getGeoLng(), currentNode.getGeoLng()) &&
|
||||||
updateNode.getGeoLng().compareTo(currentNode.getGeoLng()) == 0 &&
|
Objects.equals(updateNode.getExternalMappingId(), currentNode.getExternalMappingId()) &&
|
||||||
updateNode.getExternalMappingId().equals(currentNode.getExternalMappingId()) &&
|
Objects.equals(updateNode.getCountryId(), currentNode.getCountryId()) &&
|
||||||
updateNode.getCountryId().equals(currentNode.getCountryId()) &&
|
Objects.equals(updateNode.getIntermediate(), currentNode.getIntermediate()) &&
|
||||||
updateNode.getIntermediate().equals(currentNode.getIntermediate()) &&
|
Objects.equals(updateNode.getDestination(), currentNode.getDestination()) &&
|
||||||
updateNode.getDestination().equals(currentNode.getDestination()) &&
|
Objects.equals(updateNode.getSource(), currentNode.getSource()) &&
|
||||||
updateNode.getSource().equals(currentNode.getSource()) &&
|
Objects.equals(updateNode.getAddress(), currentNode.getAddress()) &&
|
||||||
updateNode.getAddress().equals(currentNode.getAddress()) &&
|
Objects.equals(updateNode.getDeprecated(), currentNode.getDeprecated()) &&
|
||||||
updateNode.getDeprecated().equals(currentNode.getDeprecated()) &&
|
Objects.equals(updateNode.getId(), currentNode.getId()) &&
|
||||||
updateNode.getId().equals(currentNode.getId()) &&
|
Objects.equals(updateNode.getPredecessorRequired(), currentNode.getPredecessorRequired()) &&
|
||||||
updateNode.getPredecessorRequired().equals(currentNode.getPredecessorRequired()) &&
|
|
||||||
compare(updateNode.getNodePredecessors(), currentNode.getNodePredecessors()) &&
|
compare(updateNode.getNodePredecessors(), currentNode.getNodePredecessors()) &&
|
||||||
compare(updateNode.getOutboundCountries(), currentNode.getOutboundCountries());
|
compare(updateNode.getOutboundCountries(), currentNode.getOutboundCountries());
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean compareBigDecimal(BigDecimal a, BigDecimal b) {
|
||||||
|
if (a == null && b == null) return true;
|
||||||
|
if (a == null || b == null) return false;
|
||||||
|
return a.compareTo(b) == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean compare(Collection<Integer> outbound1, Collection<Integer> outbound2) {
|
private boolean compare(Collection<Integer> outbound1, Collection<Integer> outbound2) {
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue