mirror of
https://github.com/Freika/dawarich.git
synced 2026-01-10 01:01:39 -05:00
Fix kml kmz import issues (#2023)
* Fix kml kmz import issues * Refactor KML importer to improve readability and maintainability
This commit is contained in:
parent
bb980f2210
commit
8af032a215
7 changed files with 270 additions and 114 deletions
|
|
@ -12,10 +12,12 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
|
|||
|
||||
## Fixed
|
||||
|
||||
- Cities visited during a trip are now being calculated correctly. #547 #641
|
||||
- Points on the map are now show time in user's timezone. #580 #1035
|
||||
- Cities visited during a trip are now being calculated correctly. #547 #641 #1686 #1976
|
||||
- Points on the map are now show time in user's timezone. #580 #1035 #1682
|
||||
- Date range inputs now handle pre-epoch dates gracefully by clamping to valid PostgreSQL integer range. #685
|
||||
- Redis client now also being configured so that it could connect via unix socket. #1970
|
||||
- Importing KML files now creates points with correct timestamps. #1988
|
||||
- Importing KMZ files now works correctly.
|
||||
|
||||
|
||||
# [0.36.2] - 2025-12-06
|
||||
|
|
|
|||
|
|
@ -2,8 +2,6 @@
|
|||
|
||||
[](https://discord.gg/pHsBjpt5J8) | [](https://ko-fi.com/H2H3IDYDD) | [](https://www.patreon.com/freika)
|
||||
|
||||
[](https://app.circleci.com/pipelines/github/Freika/dawarich)
|
||||
|
||||
---
|
||||
|
||||
## 📸 Screenshots
|
||||
|
|
|
|||
File diff suppressed because one or more lines are too long
|
|
@ -127,6 +127,15 @@ class Imports::SourceDetector
|
|||
else
|
||||
file_content
|
||||
end
|
||||
|
||||
# Check if it's a KMZ file (ZIP archive)
|
||||
if filename&.downcase&.end_with?('.kmz')
|
||||
# KMZ files are ZIP archives, check for ZIP signature
|
||||
# ZIP files start with "PK" (0x50 0x4B)
|
||||
return content_to_check[0..1] == 'PK'
|
||||
end
|
||||
|
||||
# For KML files, check XML structure
|
||||
(
|
||||
content_to_check.strip.start_with?('<?xml') ||
|
||||
content_to_check.strip.start_with?('<kml')
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
require 'rexml/document'
|
||||
require 'zip'
|
||||
|
||||
class Kml::Importer
|
||||
include Imports::Broadcaster
|
||||
|
|
@ -15,149 +16,246 @@ class Kml::Importer
|
|||
end
|
||||
|
||||
def call
|
||||
file_content = load_file_content
|
||||
doc = REXML::Document.new(file_content)
|
||||
|
||||
points_data = []
|
||||
|
||||
# Process all Placemarks which can contain various geometry types
|
||||
REXML::XPath.each(doc, '//Placemark') do |placemark|
|
||||
points_data.concat(parse_placemark(placemark))
|
||||
end
|
||||
|
||||
# Process gx:Track elements (Google Earth extensions for GPS tracks)
|
||||
REXML::XPath.each(doc, '//gx:Track') do |track|
|
||||
points_data.concat(parse_gx_track(track))
|
||||
end
|
||||
|
||||
points_data.compact!
|
||||
doc = load_and_parse_kml_document
|
||||
points_data = extract_all_points(doc)
|
||||
|
||||
return if points_data.empty?
|
||||
|
||||
# Process in batches to avoid memory issues with large files
|
||||
save_points_in_batches(points_data)
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def load_and_parse_kml_document
|
||||
file_content = load_kml_content
|
||||
REXML::Document.new(file_content)
|
||||
end
|
||||
|
||||
def extract_all_points(doc)
|
||||
points_data = []
|
||||
points_data.concat(extract_points_from_placemarks(doc))
|
||||
points_data.concat(extract_points_from_gx_tracks(doc))
|
||||
points_data.compact
|
||||
end
|
||||
|
||||
def save_points_in_batches(points_data)
|
||||
points_data.each_slice(1000) do |batch|
|
||||
bulk_insert_points(batch)
|
||||
end
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def parse_placemark(placemark)
|
||||
def extract_points_from_placemarks(doc)
|
||||
points = []
|
||||
timestamp = extract_timestamp(placemark)
|
||||
|
||||
# Handle Point geometry
|
||||
point_node = REXML::XPath.first(placemark, './/Point/coordinates')
|
||||
if point_node
|
||||
coords = parse_coordinates(point_node.text)
|
||||
points << build_point(coords.first, timestamp, placemark) if coords.any?
|
||||
REXML::XPath.each(doc, '//Placemark') do |placemark|
|
||||
points.concat(parse_placemark(placemark))
|
||||
end
|
||||
points
|
||||
end
|
||||
|
||||
# Handle LineString geometry (tracks/routes)
|
||||
linestring_node = REXML::XPath.first(placemark, './/LineString/coordinates')
|
||||
if linestring_node
|
||||
coords = parse_coordinates(linestring_node.text)
|
||||
coords.each do |coord|
|
||||
points << build_point(coord, timestamp, placemark)
|
||||
def extract_points_from_gx_tracks(doc)
|
||||
points = []
|
||||
REXML::XPath.each(doc, '//gx:Track') do |track|
|
||||
points.concat(parse_gx_track(track))
|
||||
end
|
||||
points
|
||||
end
|
||||
|
||||
def load_kml_content
|
||||
content = read_file_content
|
||||
content = ensure_binary_encoding(content)
|
||||
kmz_file?(content) ? extract_kml_from_kmz(content) : content
|
||||
end
|
||||
|
||||
def read_file_content
|
||||
if file_path && File.exist?(file_path)
|
||||
File.binread(file_path)
|
||||
else
|
||||
download_and_read_content
|
||||
end
|
||||
end
|
||||
|
||||
def download_and_read_content
|
||||
downloader_content = Imports::SecureFileDownloader.new(import.file).download_with_verification
|
||||
downloader_content.is_a?(StringIO) ? downloader_content.read : downloader_content
|
||||
end
|
||||
|
||||
def ensure_binary_encoding(content)
|
||||
content.force_encoding('BINARY') if content.respond_to?(:force_encoding)
|
||||
content
|
||||
end
|
||||
|
||||
def kmz_file?(content)
|
||||
content[0..1] == 'PK'
|
||||
end
|
||||
|
||||
def extract_kml_from_kmz(kmz_content)
|
||||
kml_content = find_kml_in_zip(kmz_content)
|
||||
raise 'No KML file found in KMZ archive' unless kml_content
|
||||
|
||||
kml_content
|
||||
rescue Zip::Error => e
|
||||
raise "Failed to extract KML from KMZ: #{e.message}"
|
||||
end
|
||||
|
||||
def find_kml_in_zip(kmz_content)
|
||||
kml_content = nil
|
||||
|
||||
Zip::InputStream.open(StringIO.new(kmz_content)) do |io|
|
||||
while (entry = io.get_next_entry)
|
||||
if kml_entry?(entry)
|
||||
kml_content = io.read
|
||||
break
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# Handle MultiGeometry (can contain multiple Points, LineStrings, etc.)
|
||||
kml_content
|
||||
end
|
||||
|
||||
def kml_entry?(entry)
|
||||
entry.name.downcase.end_with?('.kml')
|
||||
end
|
||||
|
||||
def parse_placemark(placemark)
|
||||
return [] unless has_explicit_timestamp?(placemark)
|
||||
|
||||
timestamp = extract_timestamp(placemark)
|
||||
points = []
|
||||
|
||||
points.concat(extract_point_geometry(placemark, timestamp))
|
||||
points.concat(extract_linestring_geometry(placemark, timestamp))
|
||||
points.concat(extract_multigeometry(placemark, timestamp))
|
||||
|
||||
points.compact
|
||||
end
|
||||
|
||||
def extract_point_geometry(placemark, timestamp)
|
||||
point_node = REXML::XPath.first(placemark, './/Point/coordinates')
|
||||
return [] unless point_node
|
||||
|
||||
coords = parse_coordinates(point_node.text)
|
||||
coords.any? ? [build_point(coords.first, timestamp, placemark)] : []
|
||||
end
|
||||
|
||||
def extract_linestring_geometry(placemark, timestamp)
|
||||
linestring_node = REXML::XPath.first(placemark, './/LineString/coordinates')
|
||||
return [] unless linestring_node
|
||||
|
||||
coords = parse_coordinates(linestring_node.text)
|
||||
coords.map { |coord| build_point(coord, timestamp, placemark) }
|
||||
end
|
||||
|
||||
def extract_multigeometry(placemark, timestamp)
|
||||
points = []
|
||||
REXML::XPath.each(placemark, './/MultiGeometry//coordinates') do |coords_node|
|
||||
coords = parse_coordinates(coords_node.text)
|
||||
coords.each do |coord|
|
||||
points << build_point(coord, timestamp, placemark)
|
||||
end
|
||||
end
|
||||
|
||||
points.compact
|
||||
points
|
||||
end
|
||||
|
||||
def parse_gx_track(track)
|
||||
# Google Earth Track extension with coordinated when/coord pairs
|
||||
points = []
|
||||
timestamps = extract_gx_timestamps(track)
|
||||
coordinates = extract_gx_coordinates(track)
|
||||
|
||||
build_gx_track_points(timestamps, coordinates)
|
||||
end
|
||||
|
||||
def extract_gx_timestamps(track)
|
||||
timestamps = []
|
||||
REXML::XPath.each(track, './/when') do |when_node|
|
||||
timestamps << when_node.text.strip
|
||||
end
|
||||
timestamps
|
||||
end
|
||||
|
||||
def extract_gx_coordinates(track)
|
||||
coordinates = []
|
||||
REXML::XPath.each(track, './/gx:coord') do |coord_node|
|
||||
coordinates << coord_node.text.strip
|
||||
end
|
||||
coordinates
|
||||
end
|
||||
|
||||
# Match timestamps with coordinates
|
||||
[timestamps.size, coordinates.size].min.times do |i|
|
||||
begin
|
||||
time = Time.parse(timestamps[i]).to_i
|
||||
coord_parts = coordinates[i].split(/\s+/)
|
||||
next if coord_parts.size < 2
|
||||
def build_gx_track_points(timestamps, coordinates)
|
||||
points = []
|
||||
min_size = [timestamps.size, coordinates.size].min
|
||||
|
||||
lng, lat, alt = coord_parts.map(&:to_f)
|
||||
|
||||
points << {
|
||||
lonlat: "POINT(#{lng} #{lat})",
|
||||
altitude: alt&.to_i || 0,
|
||||
timestamp: time,
|
||||
import_id: import.id,
|
||||
velocity: 0.0,
|
||||
raw_data: { source: 'gx_track', index: i },
|
||||
user_id: user_id,
|
||||
created_at: Time.current,
|
||||
updated_at: Time.current
|
||||
}
|
||||
rescue StandardError => e
|
||||
Rails.logger.warn("Failed to parse gx:Track point at index #{i}: #{e.message}")
|
||||
next
|
||||
end
|
||||
min_size.times do |i|
|
||||
point = build_gx_track_point(timestamps[i], coordinates[i], i)
|
||||
points << point if point
|
||||
end
|
||||
|
||||
points
|
||||
end
|
||||
|
||||
def build_gx_track_point(timestamp_str, coord_str, index)
|
||||
time = Time.parse(timestamp_str).to_i
|
||||
coord_parts = coord_str.split(/\s+/)
|
||||
return nil if coord_parts.size < 2
|
||||
|
||||
lng, lat, alt = coord_parts.map(&:to_f)
|
||||
|
||||
{
|
||||
lonlat: "POINT(#{lng} #{lat})",
|
||||
altitude: alt&.to_i || 0,
|
||||
timestamp: time,
|
||||
import_id: import.id,
|
||||
velocity: 0.0,
|
||||
raw_data: { source: 'gx_track', index: index },
|
||||
user_id: user_id,
|
||||
created_at: Time.current,
|
||||
updated_at: Time.current
|
||||
}
|
||||
rescue StandardError => e
|
||||
Rails.logger.warn("Failed to parse gx:Track point at index #{index}: #{e.message}")
|
||||
nil
|
||||
end
|
||||
|
||||
def parse_coordinates(coord_text)
|
||||
# KML coordinates format: "longitude,latitude[,altitude] ..."
|
||||
# Multiple coordinates separated by whitespace
|
||||
return [] if coord_text.blank?
|
||||
|
||||
coord_text.strip.split(/\s+/).map do |coord_str|
|
||||
parts = coord_str.split(',')
|
||||
next if parts.size < 2
|
||||
coord_text.strip.split(/\s+/).map { |coord_str| parse_single_coordinate(coord_str) }.compact
|
||||
end
|
||||
|
||||
{
|
||||
lng: parts[0].to_f,
|
||||
lat: parts[1].to_f,
|
||||
alt: parts[2]&.to_f || 0.0
|
||||
}
|
||||
end.compact
|
||||
def parse_single_coordinate(coord_str)
|
||||
parts = coord_str.split(',')
|
||||
return nil if parts.size < 2
|
||||
|
||||
{
|
||||
lng: parts[0].to_f,
|
||||
lat: parts[1].to_f,
|
||||
alt: parts[2]&.to_f || 0.0
|
||||
}
|
||||
end
|
||||
|
||||
def has_explicit_timestamp?(placemark)
|
||||
find_timestamp_node(placemark).present?
|
||||
end
|
||||
|
||||
def extract_timestamp(placemark)
|
||||
# Try TimeStamp first
|
||||
timestamp_node = REXML::XPath.first(placemark, './/TimeStamp/when')
|
||||
return Time.parse(timestamp_node.text).to_i if timestamp_node
|
||||
node = find_timestamp_node(placemark)
|
||||
raise 'No timestamp found in placemark' unless node
|
||||
|
||||
# Try TimeSpan begin
|
||||
timespan_begin = REXML::XPath.first(placemark, './/TimeSpan/begin')
|
||||
return Time.parse(timespan_begin.text).to_i if timespan_begin
|
||||
|
||||
# Try TimeSpan end as fallback
|
||||
timespan_end = REXML::XPath.first(placemark, './/TimeSpan/end')
|
||||
return Time.parse(timespan_end.text).to_i if timespan_end
|
||||
|
||||
# Default to import creation time if no timestamp found
|
||||
import.created_at.to_i
|
||||
Time.parse(node.text).to_i
|
||||
rescue StandardError => e
|
||||
Rails.logger.warn("Failed to parse timestamp: #{e.message}")
|
||||
import.created_at.to_i
|
||||
Rails.logger.error("Failed to parse timestamp: #{e.message}")
|
||||
raise e
|
||||
end
|
||||
|
||||
def find_timestamp_node(placemark)
|
||||
REXML::XPath.first(placemark, './/TimeStamp/when') ||
|
||||
REXML::XPath.first(placemark, './/TimeSpan/begin') ||
|
||||
REXML::XPath.first(placemark, './/TimeSpan/end')
|
||||
end
|
||||
|
||||
def build_point(coord, timestamp, placemark)
|
||||
return if coord[:lat].blank? || coord[:lng].blank?
|
||||
return if invalid_coordinates?(coord)
|
||||
|
||||
{
|
||||
lonlat: "POINT(#{coord[:lng]} #{coord[:lat]})",
|
||||
lonlat: format_point_geometry(coord),
|
||||
altitude: coord[:alt].to_i,
|
||||
timestamp: timestamp,
|
||||
import_id: import.id,
|
||||
|
|
@ -169,31 +267,52 @@ class Kml::Importer
|
|||
}
|
||||
end
|
||||
|
||||
def invalid_coordinates?(coord)
|
||||
coord[:lat].blank? || coord[:lng].blank?
|
||||
end
|
||||
|
||||
def format_point_geometry(coord)
|
||||
"POINT(#{coord[:lng]} #{coord[:lat]})"
|
||||
end
|
||||
|
||||
def extract_velocity(placemark)
|
||||
# Try to extract speed from ExtendedData
|
||||
speed_node = REXML::XPath.first(placemark, ".//Data[@name='speed']/value") ||
|
||||
REXML::XPath.first(placemark, ".//Data[@name='Speed']/value") ||
|
||||
REXML::XPath.first(placemark, ".//Data[@name='velocity']/value")
|
||||
|
||||
return speed_node.text.to_f.round(1) if speed_node
|
||||
|
||||
0.0
|
||||
speed_node = find_speed_node(placemark)
|
||||
speed_node ? speed_node.text.to_f.round(1) : 0.0
|
||||
rescue StandardError
|
||||
0.0
|
||||
end
|
||||
|
||||
def find_speed_node(placemark)
|
||||
REXML::XPath.first(placemark, ".//Data[@name='speed']/value") ||
|
||||
REXML::XPath.first(placemark, ".//Data[@name='Speed']/value") ||
|
||||
REXML::XPath.first(placemark, ".//Data[@name='velocity']/value")
|
||||
end
|
||||
|
||||
def extract_extended_data(placemark)
|
||||
data = {}
|
||||
data.merge!(extract_name_and_description(placemark))
|
||||
data.merge!(extract_custom_data_fields(placemark))
|
||||
data
|
||||
rescue StandardError => e
|
||||
Rails.logger.warn("Failed to extract extended data: #{e.message}")
|
||||
{}
|
||||
end
|
||||
|
||||
def extract_name_and_description(placemark)
|
||||
data = {}
|
||||
|
||||
# Extract name if present
|
||||
name_node = REXML::XPath.first(placemark, './/name')
|
||||
data['name'] = name_node.text.strip if name_node
|
||||
|
||||
# Extract description if present
|
||||
desc_node = REXML::XPath.first(placemark, './/description')
|
||||
data['description'] = desc_node.text.strip if desc_node
|
||||
|
||||
# Extract all ExtendedData/Data elements
|
||||
data
|
||||
end
|
||||
|
||||
def extract_custom_data_fields(placemark)
|
||||
data = {}
|
||||
|
||||
REXML::XPath.each(placemark, './/ExtendedData/Data') do |data_node|
|
||||
name = data_node.attributes['name']
|
||||
value_node = REXML::XPath.first(data_node, './value')
|
||||
|
|
@ -201,26 +320,29 @@ class Kml::Importer
|
|||
end
|
||||
|
||||
data
|
||||
rescue StandardError => e
|
||||
Rails.logger.warn("Failed to extract extended data: #{e.message}")
|
||||
{}
|
||||
end
|
||||
|
||||
def bulk_insert_points(batch)
|
||||
unique_batch = batch.uniq { |record| [record[:lonlat], record[:timestamp], record[:user_id]] }
|
||||
unique_batch = deduplicate_batch(batch)
|
||||
upsert_points(unique_batch)
|
||||
broadcast_import_progress(import, unique_batch.size)
|
||||
rescue StandardError => e
|
||||
create_notification("Failed to process KML file: #{e.message}")
|
||||
end
|
||||
|
||||
def deduplicate_batch(batch)
|
||||
batch.uniq { |record| [record[:lonlat], record[:timestamp], record[:user_id]] }
|
||||
end
|
||||
|
||||
def upsert_points(batch)
|
||||
# rubocop:disable Rails/SkipsModelValidations
|
||||
Point.upsert_all(
|
||||
unique_batch,
|
||||
batch,
|
||||
unique_by: %i[lonlat timestamp user_id],
|
||||
returning: false,
|
||||
on_duplicate: :skip
|
||||
)
|
||||
# rubocop:enable Rails/SkipsModelValidations
|
||||
|
||||
broadcast_import_progress(import, unique_batch.size)
|
||||
rescue StandardError => e
|
||||
create_notification("Failed to process KML file: #{e.message}")
|
||||
end
|
||||
|
||||
def create_notification(message)
|
||||
|
|
|
|||
BIN
spec/fixtures/files/kml/points_with_timestamps.kmz
vendored
Normal file
BIN
spec/fixtures/files/kml/points_with_timestamps.kmz
vendored
Normal file
Binary file not shown.
|
|
@ -142,6 +142,31 @@ RSpec.describe Kml::Importer do
|
|||
end
|
||||
end
|
||||
|
||||
context 'when importing KMZ file (compressed KML)' do
|
||||
let(:file_path) { Rails.root.join('spec/fixtures/files/kml/points_with_timestamps.kmz').to_s }
|
||||
|
||||
it 'extracts and processes KML from KMZ archive' do
|
||||
expect { parser }.to change(Point, :count).by(3)
|
||||
end
|
||||
|
||||
it 'creates points with correct data from extracted KML' do
|
||||
parser
|
||||
|
||||
point = user.points.order(:timestamp).first
|
||||
|
||||
expect(point.lat).to eq(37.4220)
|
||||
expect(point.lon).to eq(-122.0841)
|
||||
expect(point.altitude).to eq(10)
|
||||
expect(point.timestamp).to eq(Time.zone.parse('2024-01-15T12:00:00Z').to_i)
|
||||
end
|
||||
|
||||
it 'broadcasts importing progress' do
|
||||
expect_any_instance_of(Imports::Broadcaster).to receive(:broadcast_import_progress).at_least(1).time
|
||||
|
||||
parser
|
||||
end
|
||||
end
|
||||
|
||||
context 'when import fails' do
|
||||
let(:file_path) { Rails.root.join('spec/fixtures/files/kml/points_with_timestamps.kml').to_s }
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue