Fix kml kmz import issues (#2023)

* Fix kml kmz import issues

* Refactor KML importer to improve readability and maintainability
This commit is contained in:
Evgenii Burmakin 2025-12-09 19:37:27 +01:00 committed by GitHub
parent bb980f2210
commit 8af032a215
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 270 additions and 114 deletions

View file

@ -12,10 +12,12 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
## Fixed
- Cities visited during a trip are now being calculated correctly. #547 #641
- Points on the map are now show time in user's timezone. #580 #1035
- Cities visited during a trip are now being calculated correctly. #547 #641 #1686 #1976
- Points on the map are now show time in user's timezone. #580 #1035 #1682
- Date range inputs now handle pre-epoch dates gracefully by clamping to valid PostgreSQL integer range. #685
- Redis client now also being configured so that it could connect via unix socket. #1970
- Importing KML files now creates points with correct timestamps. #1988
- Importing KMZ files now works correctly.
# [0.36.2] - 2025-12-06

View file

@ -2,8 +2,6 @@
[![Discord](https://dcbadge.limes.pink/api/server/pHsBjpt5J8)](https://discord.gg/pHsBjpt5J8) | [![ko-fi](https://ko-fi.com/img/githubbutton_sm.svg)](https://ko-fi.com/H2H3IDYDD) | [![Patreon](https://img.shields.io/endpoint.svg?url=https%3A%2F%2Fshieldsio-patreon.vercel.app%2Fapi%3Fusername%3Dfreika%26type%3Dpatrons&style=for-the-badge)](https://www.patreon.com/freika)
[![CircleCI](https://circleci.com/gh/Freika/dawarich.svg?style=svg)](https://app.circleci.com/pipelines/github/Freika/dawarich)
---
## 📸 Screenshots

File diff suppressed because one or more lines are too long

View file

@ -127,6 +127,15 @@ class Imports::SourceDetector
else
file_content
end
# Check if it's a KMZ file (ZIP archive)
if filename&.downcase&.end_with?('.kmz')
# KMZ files are ZIP archives, check for ZIP signature
# ZIP files start with "PK" (0x50 0x4B)
return content_to_check[0..1] == 'PK'
end
# For KML files, check XML structure
(
content_to_check.strip.start_with?('<?xml') ||
content_to_check.strip.start_with?('<kml')

View file

@ -1,6 +1,7 @@
# frozen_string_literal: true
require 'rexml/document'
require 'zip'
class Kml::Importer
include Imports::Broadcaster
@ -15,149 +16,246 @@ class Kml::Importer
end
def call
file_content = load_file_content
doc = REXML::Document.new(file_content)
points_data = []
# Process all Placemarks which can contain various geometry types
REXML::XPath.each(doc, '//Placemark') do |placemark|
points_data.concat(parse_placemark(placemark))
end
# Process gx:Track elements (Google Earth extensions for GPS tracks)
REXML::XPath.each(doc, '//gx:Track') do |track|
points_data.concat(parse_gx_track(track))
end
points_data.compact!
doc = load_and_parse_kml_document
points_data = extract_all_points(doc)
return if points_data.empty?
# Process in batches to avoid memory issues with large files
save_points_in_batches(points_data)
end
private
def load_and_parse_kml_document
file_content = load_kml_content
REXML::Document.new(file_content)
end
def extract_all_points(doc)
points_data = []
points_data.concat(extract_points_from_placemarks(doc))
points_data.concat(extract_points_from_gx_tracks(doc))
points_data.compact
end
def save_points_in_batches(points_data)
points_data.each_slice(1000) do |batch|
bulk_insert_points(batch)
end
end
private
def parse_placemark(placemark)
def extract_points_from_placemarks(doc)
points = []
timestamp = extract_timestamp(placemark)
# Handle Point geometry
point_node = REXML::XPath.first(placemark, './/Point/coordinates')
if point_node
coords = parse_coordinates(point_node.text)
points << build_point(coords.first, timestamp, placemark) if coords.any?
REXML::XPath.each(doc, '//Placemark') do |placemark|
points.concat(parse_placemark(placemark))
end
points
end
# Handle LineString geometry (tracks/routes)
linestring_node = REXML::XPath.first(placemark, './/LineString/coordinates')
if linestring_node
coords = parse_coordinates(linestring_node.text)
coords.each do |coord|
points << build_point(coord, timestamp, placemark)
def extract_points_from_gx_tracks(doc)
points = []
REXML::XPath.each(doc, '//gx:Track') do |track|
points.concat(parse_gx_track(track))
end
points
end
def load_kml_content
content = read_file_content
content = ensure_binary_encoding(content)
kmz_file?(content) ? extract_kml_from_kmz(content) : content
end
def read_file_content
if file_path && File.exist?(file_path)
File.binread(file_path)
else
download_and_read_content
end
end
def download_and_read_content
downloader_content = Imports::SecureFileDownloader.new(import.file).download_with_verification
downloader_content.is_a?(StringIO) ? downloader_content.read : downloader_content
end
def ensure_binary_encoding(content)
content.force_encoding('BINARY') if content.respond_to?(:force_encoding)
content
end
def kmz_file?(content)
content[0..1] == 'PK'
end
def extract_kml_from_kmz(kmz_content)
kml_content = find_kml_in_zip(kmz_content)
raise 'No KML file found in KMZ archive' unless kml_content
kml_content
rescue Zip::Error => e
raise "Failed to extract KML from KMZ: #{e.message}"
end
def find_kml_in_zip(kmz_content)
kml_content = nil
Zip::InputStream.open(StringIO.new(kmz_content)) do |io|
while (entry = io.get_next_entry)
if kml_entry?(entry)
kml_content = io.read
break
end
end
end
# Handle MultiGeometry (can contain multiple Points, LineStrings, etc.)
kml_content
end
def kml_entry?(entry)
entry.name.downcase.end_with?('.kml')
end
def parse_placemark(placemark)
return [] unless has_explicit_timestamp?(placemark)
timestamp = extract_timestamp(placemark)
points = []
points.concat(extract_point_geometry(placemark, timestamp))
points.concat(extract_linestring_geometry(placemark, timestamp))
points.concat(extract_multigeometry(placemark, timestamp))
points.compact
end
def extract_point_geometry(placemark, timestamp)
point_node = REXML::XPath.first(placemark, './/Point/coordinates')
return [] unless point_node
coords = parse_coordinates(point_node.text)
coords.any? ? [build_point(coords.first, timestamp, placemark)] : []
end
def extract_linestring_geometry(placemark, timestamp)
linestring_node = REXML::XPath.first(placemark, './/LineString/coordinates')
return [] unless linestring_node
coords = parse_coordinates(linestring_node.text)
coords.map { |coord| build_point(coord, timestamp, placemark) }
end
def extract_multigeometry(placemark, timestamp)
points = []
REXML::XPath.each(placemark, './/MultiGeometry//coordinates') do |coords_node|
coords = parse_coordinates(coords_node.text)
coords.each do |coord|
points << build_point(coord, timestamp, placemark)
end
end
points.compact
points
end
def parse_gx_track(track)
# Google Earth Track extension with coordinated when/coord pairs
points = []
timestamps = extract_gx_timestamps(track)
coordinates = extract_gx_coordinates(track)
build_gx_track_points(timestamps, coordinates)
end
def extract_gx_timestamps(track)
timestamps = []
REXML::XPath.each(track, './/when') do |when_node|
timestamps << when_node.text.strip
end
timestamps
end
def extract_gx_coordinates(track)
coordinates = []
REXML::XPath.each(track, './/gx:coord') do |coord_node|
coordinates << coord_node.text.strip
end
coordinates
end
# Match timestamps with coordinates
[timestamps.size, coordinates.size].min.times do |i|
begin
time = Time.parse(timestamps[i]).to_i
coord_parts = coordinates[i].split(/\s+/)
next if coord_parts.size < 2
def build_gx_track_points(timestamps, coordinates)
points = []
min_size = [timestamps.size, coordinates.size].min
lng, lat, alt = coord_parts.map(&:to_f)
points << {
lonlat: "POINT(#{lng} #{lat})",
altitude: alt&.to_i || 0,
timestamp: time,
import_id: import.id,
velocity: 0.0,
raw_data: { source: 'gx_track', index: i },
user_id: user_id,
created_at: Time.current,
updated_at: Time.current
}
rescue StandardError => e
Rails.logger.warn("Failed to parse gx:Track point at index #{i}: #{e.message}")
next
end
min_size.times do |i|
point = build_gx_track_point(timestamps[i], coordinates[i], i)
points << point if point
end
points
end
def build_gx_track_point(timestamp_str, coord_str, index)
time = Time.parse(timestamp_str).to_i
coord_parts = coord_str.split(/\s+/)
return nil if coord_parts.size < 2
lng, lat, alt = coord_parts.map(&:to_f)
{
lonlat: "POINT(#{lng} #{lat})",
altitude: alt&.to_i || 0,
timestamp: time,
import_id: import.id,
velocity: 0.0,
raw_data: { source: 'gx_track', index: index },
user_id: user_id,
created_at: Time.current,
updated_at: Time.current
}
rescue StandardError => e
Rails.logger.warn("Failed to parse gx:Track point at index #{index}: #{e.message}")
nil
end
def parse_coordinates(coord_text)
# KML coordinates format: "longitude,latitude[,altitude] ..."
# Multiple coordinates separated by whitespace
return [] if coord_text.blank?
coord_text.strip.split(/\s+/).map do |coord_str|
parts = coord_str.split(',')
next if parts.size < 2
coord_text.strip.split(/\s+/).map { |coord_str| parse_single_coordinate(coord_str) }.compact
end
{
lng: parts[0].to_f,
lat: parts[1].to_f,
alt: parts[2]&.to_f || 0.0
}
end.compact
def parse_single_coordinate(coord_str)
parts = coord_str.split(',')
return nil if parts.size < 2
{
lng: parts[0].to_f,
lat: parts[1].to_f,
alt: parts[2]&.to_f || 0.0
}
end
def has_explicit_timestamp?(placemark)
find_timestamp_node(placemark).present?
end
def extract_timestamp(placemark)
# Try TimeStamp first
timestamp_node = REXML::XPath.first(placemark, './/TimeStamp/when')
return Time.parse(timestamp_node.text).to_i if timestamp_node
node = find_timestamp_node(placemark)
raise 'No timestamp found in placemark' unless node
# Try TimeSpan begin
timespan_begin = REXML::XPath.first(placemark, './/TimeSpan/begin')
return Time.parse(timespan_begin.text).to_i if timespan_begin
# Try TimeSpan end as fallback
timespan_end = REXML::XPath.first(placemark, './/TimeSpan/end')
return Time.parse(timespan_end.text).to_i if timespan_end
# Default to import creation time if no timestamp found
import.created_at.to_i
Time.parse(node.text).to_i
rescue StandardError => e
Rails.logger.warn("Failed to parse timestamp: #{e.message}")
import.created_at.to_i
Rails.logger.error("Failed to parse timestamp: #{e.message}")
raise e
end
def find_timestamp_node(placemark)
REXML::XPath.first(placemark, './/TimeStamp/when') ||
REXML::XPath.first(placemark, './/TimeSpan/begin') ||
REXML::XPath.first(placemark, './/TimeSpan/end')
end
def build_point(coord, timestamp, placemark)
return if coord[:lat].blank? || coord[:lng].blank?
return if invalid_coordinates?(coord)
{
lonlat: "POINT(#{coord[:lng]} #{coord[:lat]})",
lonlat: format_point_geometry(coord),
altitude: coord[:alt].to_i,
timestamp: timestamp,
import_id: import.id,
@ -169,31 +267,52 @@ class Kml::Importer
}
end
def invalid_coordinates?(coord)
coord[:lat].blank? || coord[:lng].blank?
end
def format_point_geometry(coord)
"POINT(#{coord[:lng]} #{coord[:lat]})"
end
def extract_velocity(placemark)
# Try to extract speed from ExtendedData
speed_node = REXML::XPath.first(placemark, ".//Data[@name='speed']/value") ||
REXML::XPath.first(placemark, ".//Data[@name='Speed']/value") ||
REXML::XPath.first(placemark, ".//Data[@name='velocity']/value")
return speed_node.text.to_f.round(1) if speed_node
0.0
speed_node = find_speed_node(placemark)
speed_node ? speed_node.text.to_f.round(1) : 0.0
rescue StandardError
0.0
end
def find_speed_node(placemark)
REXML::XPath.first(placemark, ".//Data[@name='speed']/value") ||
REXML::XPath.first(placemark, ".//Data[@name='Speed']/value") ||
REXML::XPath.first(placemark, ".//Data[@name='velocity']/value")
end
def extract_extended_data(placemark)
data = {}
data.merge!(extract_name_and_description(placemark))
data.merge!(extract_custom_data_fields(placemark))
data
rescue StandardError => e
Rails.logger.warn("Failed to extract extended data: #{e.message}")
{}
end
def extract_name_and_description(placemark)
data = {}
# Extract name if present
name_node = REXML::XPath.first(placemark, './/name')
data['name'] = name_node.text.strip if name_node
# Extract description if present
desc_node = REXML::XPath.first(placemark, './/description')
data['description'] = desc_node.text.strip if desc_node
# Extract all ExtendedData/Data elements
data
end
def extract_custom_data_fields(placemark)
data = {}
REXML::XPath.each(placemark, './/ExtendedData/Data') do |data_node|
name = data_node.attributes['name']
value_node = REXML::XPath.first(data_node, './value')
@ -201,26 +320,29 @@ class Kml::Importer
end
data
rescue StandardError => e
Rails.logger.warn("Failed to extract extended data: #{e.message}")
{}
end
def bulk_insert_points(batch)
unique_batch = batch.uniq { |record| [record[:lonlat], record[:timestamp], record[:user_id]] }
unique_batch = deduplicate_batch(batch)
upsert_points(unique_batch)
broadcast_import_progress(import, unique_batch.size)
rescue StandardError => e
create_notification("Failed to process KML file: #{e.message}")
end
def deduplicate_batch(batch)
batch.uniq { |record| [record[:lonlat], record[:timestamp], record[:user_id]] }
end
def upsert_points(batch)
# rubocop:disable Rails/SkipsModelValidations
Point.upsert_all(
unique_batch,
batch,
unique_by: %i[lonlat timestamp user_id],
returning: false,
on_duplicate: :skip
)
# rubocop:enable Rails/SkipsModelValidations
broadcast_import_progress(import, unique_batch.size)
rescue StandardError => e
create_notification("Failed to process KML file: #{e.message}")
end
def create_notification(message)

Binary file not shown.

View file

@ -142,6 +142,31 @@ RSpec.describe Kml::Importer do
end
end
context 'when importing KMZ file (compressed KML)' do
let(:file_path) { Rails.root.join('spec/fixtures/files/kml/points_with_timestamps.kmz').to_s }
it 'extracts and processes KML from KMZ archive' do
expect { parser }.to change(Point, :count).by(3)
end
it 'creates points with correct data from extracted KML' do
parser
point = user.points.order(:timestamp).first
expect(point.lat).to eq(37.4220)
expect(point.lon).to eq(-122.0841)
expect(point.altitude).to eq(10)
expect(point.timestamp).to eq(Time.zone.parse('2024-01-15T12:00:00Z').to_i)
end
it 'broadcasts importing progress' do
expect_any_instance_of(Imports::Broadcaster).to receive(:broadcast_import_progress).at_least(1).time
parser
end
end
context 'when import fails' do
let(:file_path) { Rails.root.join('spec/fixtures/files/kml/points_with_timestamps.kml').to_s }