diff --git a/src/main/java/de/avatic/lcc/model/azuremaps/geocoding/fuzzy/FuzzySearchResponse.java b/src/main/java/de/avatic/lcc/model/azuremaps/geocoding/fuzzy/FuzzySearchResponse.java new file mode 100644 index 0000000..19866a8 --- /dev/null +++ b/src/main/java/de/avatic/lcc/model/azuremaps/geocoding/fuzzy/FuzzySearchResponse.java @@ -0,0 +1,57 @@ +package de.avatic.lcc.model.azuremaps.geocoding.fuzzy; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import java.util.List; + +@JsonIgnoreProperties(ignoreUnknown = true) +public class FuzzySearchResponse { + private Summary summary; + private List results; + + public Summary getSummary() { + return summary; + } + + public void setSummary(Summary summary) { + this.summary = summary; + } + + public List getResults() { + return results; + } + + public void setResults(List results) { + this.results = results; + } + + @JsonIgnoreProperties(ignoreUnknown = true) + public static class Summary { + private String query; + private int numResults; + private int totalResults; + + public String getQuery() { + return query; + } + + public void setQuery(String query) { + this.query = query; + } + + public int getNumResults() { + return numResults; + } + + public void setNumResults(int numResults) { + this.numResults = numResults; + } + + public int getTotalResults() { + return totalResults; + } + + public void setTotalResults(int totalResults) { + this.totalResults = totalResults; + } + } +} \ No newline at end of file diff --git a/src/main/java/de/avatic/lcc/model/azuremaps/geocoding/fuzzy/FuzzySearchResult.java b/src/main/java/de/avatic/lcc/model/azuremaps/geocoding/fuzzy/FuzzySearchResult.java new file mode 100644 index 0000000..3bd0fa0 --- /dev/null +++ b/src/main/java/de/avatic/lcc/model/azuremaps/geocoding/fuzzy/FuzzySearchResult.java @@ -0,0 +1,150 @@ +package de.avatic.lcc.model.azuremaps.geocoding.fuzzy; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; + +@JsonIgnoreProperties(ignoreUnknown = true) +public class FuzzySearchResult { + private String type; + private double score; + private Position position; + private Address address; + private String entityType; + + public String getType() { + return type; + } + + public void setType(String type) { + this.type = type; + } + + public double getScore() { + return score; + } + + public void setScore(double score) { + this.score = score; + } + + public Position getPosition() { + return position; + } + + public void setPosition(Position position) { + this.position = position; + } + + public Address getAddress() { + return address; + } + + public void setAddress(Address address) { + this.address = address; + } + + public String getEntityType() { + return entityType; + } + + public void setEntityType(String entityType) { + this.entityType = entityType; + } + + @JsonIgnoreProperties(ignoreUnknown = true) + public static class Position { + private double lat; + private double lon; + + public double getLat() { + return lat; + } + + public void setLat(double lat) { + this.lat = lat; + } + + public double getLon() { + return lon; + } + + public void setLon(double lon) { + this.lon = lon; + } + } + + @JsonIgnoreProperties(ignoreUnknown = true) + public static class Address { + private String freeformAddress; + private String countryCode; + private String countryCodeISO3; + private String country; + private String municipality; + private String postalCode; + private String streetName; + private String streetNumber; + + public String getFreeformAddress() { + return freeformAddress; + } + + public void setFreeformAddress(String freeformAddress) { + this.freeformAddress = freeformAddress; + } + + public String getCountryCode() { + return countryCode; + } + + public void setCountryCode(String countryCode) { + this.countryCode = countryCode; + } + + public String getCountryCodeISO3() { + return countryCodeISO3; + } + + public void setCountryCodeISO3(String countryCodeISO3) { + this.countryCodeISO3 = countryCodeISO3; + } + + public String getCountry() { + return country; + } + + public void setCountry(String country) { + this.country = country; + } + + public String getMunicipality() { + return municipality; + } + + public void setMunicipality(String municipality) { + this.municipality = municipality; + } + + public String getPostalCode() { + return postalCode; + } + + public void setPostalCode(String postalCode) { + this.postalCode = postalCode; + } + + public String getStreetName() { + return streetName; + } + + public void setStreetName(String streetName) { + this.streetName = streetName; + } + + public String getStreetNumber() { + return streetNumber; + } + + public void setStreetNumber(String streetNumber) { + this.streetNumber = streetNumber; + } + } +} \ No newline at end of file diff --git a/src/main/java/de/avatic/lcc/service/api/BatchGeoApiService.java b/src/main/java/de/avatic/lcc/service/api/BatchGeoApiService.java index a0d0cd8..540dcec 100644 --- a/src/main/java/de/avatic/lcc/service/api/BatchGeoApiService.java +++ b/src/main/java/de/avatic/lcc/service/api/BatchGeoApiService.java @@ -1,11 +1,12 @@ package de.avatic.lcc.service.api; -import de.avatic.lcc.model.excel.ExcelNode; import de.avatic.lcc.model.azuremaps.geocoding.batch.BatchGeocodingRequest; import de.avatic.lcc.model.azuremaps.geocoding.batch.BatchGeocodingResponse; import de.avatic.lcc.model.azuremaps.geocoding.batch.BatchItem; +import de.avatic.lcc.model.azuremaps.geocoding.fuzzy.FuzzySearchResponse; import de.avatic.lcc.model.bulk.BulkInstruction; import de.avatic.lcc.model.db.country.IsoCode; +import de.avatic.lcc.model.excel.ExcelNode; import de.avatic.lcc.util.exception.internalerror.ExcelValidationError; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -17,6 +18,8 @@ import org.springframework.web.util.UriComponentsBuilder; import java.math.BigDecimal; import java.net.URI; +import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.List; import java.util.Optional; @@ -45,6 +48,7 @@ public class BatchGeoApiService { } ArrayList> noGeo = new ArrayList<>(); + ArrayList> failedGeoLookups = new ArrayList<>(); int totalSuccessful = 0; for (var node : nodes) { @@ -53,6 +57,7 @@ public class BatchGeoApiService { } } + for (int currentBatch = 0; currentBatch < noGeo.size(); currentBatch += MAX_BATCH_SIZE) { int end = Math.min(currentBatch + MAX_BATCH_SIZE, noGeo.size()); var chunk = noGeo.subList(currentBatch, end); @@ -73,7 +78,8 @@ public class BatchGeoApiService { if (!result.getFeatures().isEmpty() && (result.getFeatures().getFirst().getProperties().getConfidence().equalsIgnoreCase("high") || result.getFeatures().getFirst().getProperties().getConfidence().equalsIgnoreCase("medium") || - result.getFeatures().getFirst().getProperties().getMatchCodes().stream().anyMatch(s -> s.equalsIgnoreCase("good")))) { + (result.getFeatures().getFirst().getProperties().getMatchCodes() != null && + result.getFeatures().getFirst().getProperties().getMatchCodes().stream().anyMatch(s -> s.equalsIgnoreCase("good"))))) { var geometry = result.getFeatures().getFirst().getGeometry(); var properties = result.getFeatures().getFirst().getProperties(); node.setGeoLng(BigDecimal.valueOf(geometry.getCoordinates().get(0))); @@ -82,11 +88,106 @@ public class BatchGeoApiService { node.setCountryId(IsoCode.valueOf(properties.getAddress().getCountryRegion().getIso())); } else { logger.warn("Geocoding failed for address {}", node.getAddress()); - throw new ExcelValidationError("Unable to geocode " + node.getName() + ". Please check your address or enter geo position yourself."); + failedGeoLookups.add(chunk.get(itemIdx)); + //throw new ExcelValidationError("Unable to geocode " + node.getName() + ". Please check your address or enter geo position yourself."); } } } } + + // Second pass: fuzzy lookup with company name for failed addresses + if (!failedGeoLookups.isEmpty()) { + logger.info("Retrying {} failed lookups with fuzzy search", failedGeoLookups.size()); + int fuzzySuccessful = 0; + + for (var instruction : failedGeoLookups) { + var node = instruction.getEntity(); + var fuzzyResult = executeFuzzySearch(node); + + if (fuzzyResult.isPresent() && fuzzyResult.get().getResults() != null + && !fuzzyResult.get().getResults().isEmpty()) { + + var result = fuzzyResult.get().getResults().getFirst(); + + // Score >= 0.7 means good confidence (1.0 = perfect match) + if (result.getScore() >= 7.0) { + node.setGeoLat(BigDecimal.valueOf(result.getPosition().getLat())); + node.setGeoLng(BigDecimal.valueOf(result.getPosition().getLon())); + node.setAddress(result.getAddress().getFreeformAddress()); + + // Update country if it differs + if (result.getAddress().getCountryCode() != null) { + try { + node.setCountryId(IsoCode.valueOf(result.getAddress().getCountryCode())); + } catch (IllegalArgumentException e) { + logger.warn("Unknown country code: {}", result.getAddress().getCountryCode()); + } + } + + fuzzySuccessful++; + logger.info("Fuzzy search successful for: {} (score: {})", + node.getName(), result.getScore()); + } else { + logger.warn("Fuzzy search returned low confidence result for: {} (score: {})", + node.getName(), result.getScore()); + } + } else { + logger.error("Fuzzy search found no results for: {}", node.getName()); + } + } + + logger.info("Fuzzy lookup recovered {} of {} failed addresses", + fuzzySuccessful, failedGeoLookups.size()); + + // Throw error for remaining failed lookups + int remainingFailed = failedGeoLookups.size() - fuzzySuccessful; + if (remainingFailed > 0) { + var firstFailed = failedGeoLookups.stream() + .filter(i -> i.getEntity().getGeoLat() == null) + .findFirst() + .map(BulkInstruction::getEntity) + .orElse(null); + + if (firstFailed != null) { + throw new ExcelValidationError("Unable to geocode " + firstFailed.getName() + + ". Please check your address or enter geo position yourself."); + } + } + } + } + + private Optional executeFuzzySearch(ExcelNode node) { + try { + String companyName = node.getName(); + String country = node.getCountryId().name(); + + // Normalisiere Unicode für konsistente Suche + companyName = java.text.Normalizer.normalize(companyName, java.text.Normalizer.Form.NFC); + + // URL-Encoding + String encodedQuery = URLEncoder.encode(companyName + ", " + node.getAddress() + ", " + country, StandardCharsets.UTF_8); + + String url = String.format( + "https://atlas.microsoft.com/search/fuzzy/json?api-version=1.0&subscription-key=%s&query=%s&limit=5", + subscriptionKey, + encodedQuery + ); + + URI uri = URI.create(url); + + logger.debug("Fuzzy search for: {} (normalized & encoded)", companyName); + + ResponseEntity response = restTemplate.getForEntity( + uri, + FuzzySearchResponse.class + ); + + return Optional.ofNullable(response.getBody()); + + } catch (Exception e) { + logger.error("Fuzzy search failed for {}", node.getName(), e); + return Optional.empty(); + } } private String getGeoCodeString(ExcelNode excelNode) { diff --git a/src/main/java/de/avatic/lcc/service/excelMapper/NodeExcelMapper.java b/src/main/java/de/avatic/lcc/service/excelMapper/NodeExcelMapper.java index 660e281..62565c3 100644 --- a/src/main/java/de/avatic/lcc/service/excelMapper/NodeExcelMapper.java +++ b/src/main/java/de/avatic/lcc/service/excelMapper/NodeExcelMapper.java @@ -140,26 +140,51 @@ public class NodeExcelMapper { validateConstraints(row); - entity.setExternalMappingId(row.getCell(NodeHeader.MAPPING_ID.ordinal()).getStringCellValue()); - entity.setName(row.getCell(NodeHeader.NAME.ordinal()).getStringCellValue()); - entity.setAddress(row.getCell(NodeHeader.ADDRESS.ordinal()).getStringCellValue()); - entity.setCountryId(IsoCode.valueOf(row.getCell(NodeHeader.COUNTRY.ordinal()).getStringCellValue())); + entity.setExternalMappingId(getCellValueAsString(row.getCell(NodeHeader.MAPPING_ID.ordinal()))); + entity.setName(getCellValueAsString(row.getCell(NodeHeader.NAME.ordinal()))); + entity.setAddress(getCellValueAsString(row.getCell(NodeHeader.ADDRESS.ordinal()))); + entity.setCountryId(IsoCode.valueOf(getCellValueAsString(row.getCell(NodeHeader.COUNTRY.ordinal())))); entity.setGeoLat(mapGeoCoordinate(CellUtil.getCell(row, NodeHeader.GEO_LATITUDE.ordinal()))); entity.setGeoLng(mapGeoCoordinate(CellUtil.getCell(row, NodeHeader.GEO_LONGITUDE.ordinal()))); - entity.setSource(Boolean.valueOf(row.getCell(NodeHeader.IS_SOURCE.ordinal()).getStringCellValue())); - entity.setIntermediate(Boolean.valueOf(row.getCell(NodeHeader.IS_INTERMEDIATE.ordinal()).getStringCellValue())); - entity.setDestination(Boolean.valueOf(row.getCell(NodeHeader.IS_DESTINATION.ordinal()).getStringCellValue())); + entity.setSource(Boolean.valueOf(getCellValueAsString(row.getCell(NodeHeader.IS_SOURCE.ordinal())))); + entity.setIntermediate(Boolean.valueOf(getCellValueAsString(row.getCell(NodeHeader.IS_INTERMEDIATE.ordinal())))); + entity.setDestination(Boolean.valueOf(getCellValueAsString(row.getCell(NodeHeader.IS_DESTINATION.ordinal())))); if(!entity.getSource() && !entity.getDestination() && !entity.getIntermediate()) - throw new ExcelValidationError("Unable to validate row " + (row.getRowNum() + 1) + " column " + toExcelLetter(ContainerRateHeader.FROM_NODE.ordinal()) + ": Node with mapping id " + row.getCell(NodeHeader.MAPPING_ID.ordinal()).getStringCellValue() + " must be either source, destination or intermediate"); + throw new ExcelValidationError("Unable to validate row " + (row.getRowNum() + 1) + " column " + toExcelLetter(ContainerRateHeader.FROM_NODE.ordinal()) + ": Node with mapping id " + getCellValueAsString(row.getCell(NodeHeader.MAPPING_ID.ordinal())) + " must be either source, destination or intermediate"); + entity.setPredecessorRequired(Boolean.valueOf(getCellValueAsString(row.getCell(NodeHeader.IS_PREDECESSOR_MANDATORY.ordinal())))); + entity.setNodePredecessors(mapChainsFromCell(getCellValueAsString(CellUtil.getCell(row, NodeHeader.PREDECESSOR_NODES.ordinal())))); + entity.setOutboundCountries(mapOutboundCountriesFromCell(getCellValueAsString(CellUtil.getCell(row, NodeHeader.OUTBOUND_COUNTRIES.ordinal())))); - entity.setPredecessorRequired(Boolean.valueOf(row.getCell(NodeHeader.IS_PREDECESSOR_MANDATORY.ordinal()).getStringCellValue())); - entity.setNodePredecessors(mapChainsFromCell(CellUtil.getCell(row, NodeHeader.PREDECESSOR_NODES.ordinal()).getStringCellValue())); - entity.setOutboundCountries(mapOutboundCountriesFromCell(CellUtil.getCell(row, NodeHeader.OUTBOUND_COUNTRIES.ordinal()).getStringCellValue())); - return new BulkInstruction<>(entity, BulkInstructionType.valueOf(row.getCell(NodeHeader.OPERATION.ordinal()).getStringCellValue())); + return new BulkInstruction<>(entity, BulkInstructionType.valueOf(getCellValueAsString(row.getCell(NodeHeader.OPERATION.ordinal())))); + } + + /** + * Extracts string value from cell with proper handling of different cell types and encoding + */ + private String getCellValueAsString(Cell cell) { + if (cell == null) { + return null; + } + + return switch (cell.getCellType()) { + case STRING -> { + String value = cell.getStringCellValue(); + yield java.text.Normalizer.normalize(value, java.text.Normalizer.Form.NFC).trim(); + } + case NUMERIC -> { + if (DateUtil.isCellDateFormatted(cell)) { + yield cell.getDateCellValue().toString(); + } + yield String.valueOf(cell.getNumericCellValue()); + } + case BOOLEAN -> String.valueOf(cell.getBooleanCellValue()); + case FORMULA -> cell.getCellFormula(); + default -> ""; + }; }