diff --git a/src/main/java/de/avatic/lcc/service/api/BatchGeoApiService.java b/src/main/java/de/avatic/lcc/service/api/BatchGeoApiService.java index 540dcec..01a9c53 100644 --- a/src/main/java/de/avatic/lcc/service/api/BatchGeoApiService.java +++ b/src/main/java/de/avatic/lcc/service/api/BatchGeoApiService.java @@ -49,6 +49,7 @@ public class BatchGeoApiService { ArrayList> noGeo = new ArrayList<>(); ArrayList> failedGeoLookups = new ArrayList<>(); + ArrayList> failedFuzzyGeoLookups = new ArrayList<>(); int totalSuccessful = 0; for (var node : nodes) { @@ -57,7 +58,6 @@ public class BatchGeoApiService { } } - for (int currentBatch = 0; currentBatch < noGeo.size(); currentBatch += MAX_BATCH_SIZE) { int end = Math.min(currentBatch + MAX_BATCH_SIZE, noGeo.size()); var chunk = noGeo.subList(currentBatch, end); @@ -67,34 +67,109 @@ public class BatchGeoApiService { .toList()); if (chunkResult.isPresent()) { + var response = chunkResult.get(); - totalSuccessful += chunkResult.get().getSummary().getSuccessfulRequests(); + if (response.getSummary() != null && response.getSummary().getSuccessfulRequests() != null) { + totalSuccessful += response.getSummary().getSuccessfulRequests(); + } + + if (response.getBatchItems() == null || response.getBatchItems().isEmpty()) { + logger.warn("Batch response contains no items"); + failedGeoLookups.addAll(chunk); + continue; + } for (int itemIdx = 0; itemIdx < chunk.size(); itemIdx++) { - var result = chunkResult.get().getBatchItems().get(itemIdx); + + if (itemIdx >= response.getBatchItems().size()) { + logger.warn("BatchItems size mismatch at index {}", itemIdx); + failedGeoLookups.add(chunk.get(itemIdx)); + continue; + } + + var result = response.getBatchItems().get(itemIdx); var node = chunk.get(itemIdx).getEntity(); - if (!result.getFeatures().isEmpty() && - (result.getFeatures().getFirst().getProperties().getConfidence().equalsIgnoreCase("high") || - result.getFeatures().getFirst().getProperties().getConfidence().equalsIgnoreCase("medium") || - (result.getFeatures().getFirst().getProperties().getMatchCodes() != null && - result.getFeatures().getFirst().getProperties().getMatchCodes().stream().anyMatch(s -> s.equalsIgnoreCase("good"))))) { - var geometry = result.getFeatures().getFirst().getGeometry(); - var properties = result.getFeatures().getFirst().getProperties(); - node.setGeoLng(BigDecimal.valueOf(geometry.getCoordinates().get(0))); - node.setGeoLat(BigDecimal.valueOf(geometry.getCoordinates().get(1))); - node.setAddress(properties.getAddress().getFormattedAddress()); - node.setCountryId(IsoCode.valueOf(properties.getAddress().getCountryRegion().getIso())); - } else { - logger.warn("Geocoding failed for address {}", node.getAddress()); + + if (result == null || result.getFeatures() == null || result.getFeatures().isEmpty()) { + logger.warn("No geocoding result for address {}", + node.getAddress() != null ? node.getAddress() : "unknown"); + failedGeoLookups.add(chunk.get(itemIdx)); + continue; + } + + var feature = result.getFeatures().getFirst(); + if (feature == null) { + logger.warn("Feature is null for address {}", node.getAddress()); + failedGeoLookups.add(chunk.get(itemIdx)); + continue; + } + + var properties = feature.getProperties(); + if (properties == null) { + logger.warn("Properties is null for address {}", node.getAddress()); + failedGeoLookups.add(chunk.get(itemIdx)); + continue; + } + + String confidence = properties.getConfidence(); + boolean hasGoodConfidence = confidence != null && + (confidence.equalsIgnoreCase("high") || + confidence.equalsIgnoreCase("medium")); + + boolean hasGoodMatchCode = properties.getMatchCodes() != null && + properties.getMatchCodes().stream() + .anyMatch(s -> s != null && s.equalsIgnoreCase("good")); + + if (hasGoodConfidence || hasGoodMatchCode) { + var geometry = feature.getGeometry(); + if (geometry == null || geometry.getCoordinates() == null || + geometry.getCoordinates().size() < 2) { + logger.warn("Invalid geometry for address {}", node.getAddress()); + failedGeoLookups.add(chunk.get(itemIdx)); + continue; + } + + var coordinates = geometry.getCoordinates(); + if (coordinates.get(0) == null || coordinates.get(1) == null) { + logger.warn("Null coordinates for address {}", node.getAddress()); + failedGeoLookups.add(chunk.get(itemIdx)); + continue; + } + + node.setGeoLng(BigDecimal.valueOf(coordinates.get(0))); + node.setGeoLat(BigDecimal.valueOf(coordinates.get(1))); + + if (properties.getAddress() != null && + properties.getAddress().getFormattedAddress() != null) { + node.setAddress(properties.getAddress().getFormattedAddress()); + } + + if (properties.getAddress() != null && + properties.getAddress().getCountryRegion() != null && + properties.getAddress().getCountryRegion().getIso() != null) { + try { + node.setCountryId(IsoCode.valueOf( + properties.getAddress().getCountryRegion().getIso())); + } catch (IllegalArgumentException e) { + logger.warn("Invalid ISO code: {}", + properties.getAddress().getCountryRegion().getIso()); + } + } + } else { + logger.warn("Geocoding failed for address {} (low confidence)", + node.getAddress()); failedGeoLookups.add(chunk.get(itemIdx)); - //throw new ExcelValidationError("Unable to geocode " + node.getName() + ". Please check your address or enter geo position yourself."); } } + } else { + logger.warn("Batch request returned empty result"); + failedGeoLookups.addAll(chunk); } } + // Second pass: fuzzy lookup with company name for failed addresses if (!failedGeoLookups.isEmpty()) { logger.info("Retrying {} failed lookups with fuzzy search", failedGeoLookups.size()); @@ -108,31 +183,52 @@ public class BatchGeoApiService { && !fuzzyResult.get().getResults().isEmpty()) { var result = fuzzyResult.get().getResults().getFirst(); - - // Score >= 0.7 means good confidence (1.0 = perfect match) - if (result.getScore() >= 7.0) { - node.setGeoLat(BigDecimal.valueOf(result.getPosition().getLat())); - node.setGeoLng(BigDecimal.valueOf(result.getPosition().getLon())); - node.setAddress(result.getAddress().getFreeformAddress()); - - // Update country if it differs - if (result.getAddress().getCountryCode() != null) { - try { - node.setCountryId(IsoCode.valueOf(result.getAddress().getCountryCode())); - } catch (IllegalArgumentException e) { - logger.warn("Unknown country code: {}", result.getAddress().getCountryCode()); - } - } - - fuzzySuccessful++; - logger.info("Fuzzy search successful for: {} (score: {})", - node.getName(), result.getScore()); - } else { - logger.warn("Fuzzy search returned low confidence result for: {} (score: {})", - node.getName(), result.getScore()); + if (result == null) { + logger.warn("Fuzzy result is null for: {}", node.getName()); + failedFuzzyGeoLookups.add(instruction); + continue; } - } else { - logger.error("Fuzzy search found no results for: {}", node.getName()); + + double score = result.getScore(); + if (score < 7.0) { + logger.warn("Fuzzy search returned low confidence result for: {} (score: {})", + node.getName(), score); + failedFuzzyGeoLookups.add(instruction); + continue; + } + + if (result.getPosition() == null) { + logger.warn("Position is null for: {}", node.getName()); + failedFuzzyGeoLookups.add(instruction); + continue; + } + + double lat = result.getPosition().getLat(); + double lon = result.getPosition().getLon(); + + node.setGeoLat(BigDecimal.valueOf(lat)); + node.setGeoLng(BigDecimal.valueOf(lon)); + + if (result.getAddress() != null && + result.getAddress().getFreeformAddress() != null) { + node.setAddress(result.getAddress().getFreeformAddress()); + } + + if (result.getAddress() != null && + result.getAddress().getCountryCode() != null) { + try { + node.setCountryId(IsoCode.valueOf(result.getAddress().getCountryCode())); + } catch (IllegalArgumentException e) { + logger.warn("Unknown country code: {}", + result.getAddress().getCountryCode()); + failedFuzzyGeoLookups.add(instruction); + continue; + } + } + + fuzzySuccessful++; + logger.info("Fuzzy search successful for: {} (score: {})", + node.getName(), score); } } @@ -140,8 +236,10 @@ public class BatchGeoApiService { fuzzySuccessful, failedGeoLookups.size()); // Throw error for remaining failed lookups - int remainingFailed = failedGeoLookups.size() - fuzzySuccessful; - if (remainingFailed > 0) { + if (!failedFuzzyGeoLookups.isEmpty()) { + + failedFuzzyGeoLookups.forEach(instruction -> {logger.warn("Lookup finally failed for: {}", instruction.getEntity().getName());}); + var firstFailed = failedGeoLookups.stream() .filter(i -> i.getEntity().getGeoLat() == null) .findFirst() @@ -149,7 +247,9 @@ public class BatchGeoApiService { .orElse(null); if (firstFailed != null) { - throw new ExcelValidationError("Unable to geocode " + firstFailed.getName() + String name = firstFailed.getName() != null ? + firstFailed.getName() : "unknown"; + throw new ExcelValidationError("Unable to geocode " + name + ". Please check your address or enter geo position yourself."); } } @@ -159,13 +259,32 @@ public class BatchGeoApiService { private Optional executeFuzzySearch(ExcelNode node) { try { String companyName = node.getName(); - String country = node.getCountryId().name(); + if (companyName == null) { + logger.warn("Company name is null for fuzzy search"); + return Optional.empty(); + } + + IsoCode countryId = node.getCountryId(); + if (countryId == null) { + logger.warn("Country ID is null for fuzzy search: {}", companyName); + return Optional.empty(); + } + String country = countryId.name(); + + String address = node.getAddress(); + if (address == null) { + logger.warn("Address is null for fuzzy search: {}", companyName); + address = ""; // Fallback zu leerem String + } // Normalisiere Unicode für konsistente Suche - companyName = java.text.Normalizer.normalize(companyName, java.text.Normalizer.Form.NFC); + companyName = java.text.Normalizer.normalize(companyName, + java.text.Normalizer.Form.NFC); // URL-Encoding - String encodedQuery = URLEncoder.encode(companyName + ", " + node.getAddress() + ", " + country, StandardCharsets.UTF_8); + String encodedQuery = URLEncoder.encode( + companyName + ", " + address + ", " + country, + StandardCharsets.UTF_8); String url = String.format( "https://atlas.microsoft.com/search/fuzzy/json?api-version=1.0&subscription-key=%s&query=%s&limit=5", @@ -185,13 +304,21 @@ public class BatchGeoApiService { return Optional.ofNullable(response.getBody()); } catch (Exception e) { - logger.error("Fuzzy search failed for {}", node.getName(), e); + logger.error("Fuzzy search failed for {}", + node.getName() != null ? node.getName() : "unknown", e); return Optional.empty(); } } private String getGeoCodeString(ExcelNode excelNode) { - return excelNode.getAddress() + ", " + excelNode.getCountryId(); + String address = excelNode.getAddress(); + IsoCode countryId = excelNode.getCountryId(); + + // Fallback-Werte für null + String addressStr = address != null ? address : ""; + String countryStr = countryId != null ? countryId.name() : ""; + + return addressStr + ", " + countryStr; } private Optional executeBatchRequest(List batchItems) { diff --git a/src/main/java/de/avatic/lcc/service/bulk/BulkImportService.java b/src/main/java/de/avatic/lcc/service/bulk/BulkImportService.java index 3e0f248..84bacc5 100644 --- a/src/main/java/de/avatic/lcc/service/bulk/BulkImportService.java +++ b/src/main/java/de/avatic/lcc/service/bulk/BulkImportService.java @@ -15,6 +15,7 @@ import org.apache.poi.xssf.usermodel.XSSFWorkbook; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.stereotype.Service; +import org.springframework.transaction.annotation.Transactional; import java.io.ByteArrayInputStream; import java.io.IOException; @@ -56,6 +57,7 @@ public class BulkImportService { this.materialFastExcelMapper = materialFastExcelMapper; } + @Transactional public void processOperation(BulkOperation op) throws IOException { var file = op.getFile(); var type = op.getFileType(); diff --git a/src/main/java/de/avatic/lcc/service/bulk/bulkImport/NodeBulkImportService.java b/src/main/java/de/avatic/lcc/service/bulk/bulkImport/NodeBulkImportService.java index fa3a464..e0b1695 100644 --- a/src/main/java/de/avatic/lcc/service/bulk/bulkImport/NodeBulkImportService.java +++ b/src/main/java/de/avatic/lcc/service/bulk/bulkImport/NodeBulkImportService.java @@ -9,6 +9,7 @@ import de.avatic.lcc.service.transformer.generic.NodeTransformer; import de.avatic.lcc.util.exception.internalerror.ExcelValidationError; import org.springframework.stereotype.Service; +import java.math.BigDecimal; import java.util.*; @Service @@ -61,22 +62,26 @@ public class NodeBulkImportService { } private boolean compare(Node updateNode, Node currentNode) { - - return updateNode.getName().equals(currentNode.getName()) && - updateNode.getGeoLat().compareTo(currentNode.getGeoLat()) == 0 && - updateNode.getGeoLng().compareTo(currentNode.getGeoLng()) == 0 && - updateNode.getExternalMappingId().equals(currentNode.getExternalMappingId()) && - updateNode.getCountryId().equals(currentNode.getCountryId()) && - updateNode.getIntermediate().equals(currentNode.getIntermediate()) && - updateNode.getDestination().equals(currentNode.getDestination()) && - updateNode.getSource().equals(currentNode.getSource()) && - updateNode.getAddress().equals(currentNode.getAddress()) && - updateNode.getDeprecated().equals(currentNode.getDeprecated()) && - updateNode.getId().equals(currentNode.getId()) && - updateNode.getPredecessorRequired().equals(currentNode.getPredecessorRequired()) && + return Objects.equals(updateNode.getName(), currentNode.getName()) && + compareBigDecimal(updateNode.getGeoLat(), currentNode.getGeoLat()) && + compareBigDecimal(updateNode.getGeoLng(), currentNode.getGeoLng()) && + Objects.equals(updateNode.getExternalMappingId(), currentNode.getExternalMappingId()) && + Objects.equals(updateNode.getCountryId(), currentNode.getCountryId()) && + Objects.equals(updateNode.getIntermediate(), currentNode.getIntermediate()) && + Objects.equals(updateNode.getDestination(), currentNode.getDestination()) && + Objects.equals(updateNode.getSource(), currentNode.getSource()) && + Objects.equals(updateNode.getAddress(), currentNode.getAddress()) && + Objects.equals(updateNode.getDeprecated(), currentNode.getDeprecated()) && + Objects.equals(updateNode.getId(), currentNode.getId()) && + Objects.equals(updateNode.getPredecessorRequired(), currentNode.getPredecessorRequired()) && compare(updateNode.getNodePredecessors(), currentNode.getNodePredecessors()) && compare(updateNode.getOutboundCountries(), currentNode.getOutboundCountries()); + } + private boolean compareBigDecimal(BigDecimal a, BigDecimal b) { + if (a == null && b == null) return true; + if (a == null || b == null) return false; + return a.compareTo(b) == 0; } private boolean compare(Collection outbound1, Collection outbound2) {