Refactor KML importer to improve readability and maintainability

This commit is contained in:
Eugene Burmakin 2025-12-09 19:36:34 +01:00
parent 389198da73
commit 68bde7e32f
3 changed files with 213 additions and 171 deletions

View file

@ -2,8 +2,6 @@
[![Discord](https://dcbadge.limes.pink/api/server/pHsBjpt5J8)](https://discord.gg/pHsBjpt5J8) | [![ko-fi](https://ko-fi.com/img/githubbutton_sm.svg)](https://ko-fi.com/H2H3IDYDD) | [![Patreon](https://img.shields.io/endpoint.svg?url=https%3A%2F%2Fshieldsio-patreon.vercel.app%2Fapi%3Fusername%3Dfreika%26type%3Dpatrons&style=for-the-badge)](https://www.patreon.com/freika)
[![CircleCI](https://circleci.com/gh/Freika/dawarich.svg?style=svg)](https://app.circleci.com/pipelines/github/Freika/dawarich)
---
## 📸 Screenshots

View file

@ -34,7 +34,7 @@ export default class extends BaseController {
const statusCell = row.querySelector('[data-status-display]');
if (statusCell && data.import.status) {
statusCell.innerHTML = this.renderStatusBadge(data.import.status);
statusCell.textContent = data.import.status;
}
}
}
@ -47,32 +47,4 @@ export default class extends BaseController {
this.channel.unsubscribe();
}
}
renderStatusBadge(status) {
const statusLower = status.toLowerCase();
switch(statusLower) {
case 'completed':
return `<span class="badge badge-success badge-sm gap-1">
<span class="text-xs"></span>
<span>Completed</span>
</span>`;
case 'processing':
return `<span class="badge badge-warning badge-sm gap-1">
<span class="loading loading-spinner loading-xs"></span>
<span>Processing</span>
</span>`;
case 'failed':
return `<span class="badge badge-error badge-sm gap-1">
<span class="text-xs"></span>
<span>Failed</span>
</span>`;
default:
return `<span class="badge badge-sm">${this.capitalize(status)}</span>`;
}
}
capitalize(str) {
return str.charAt(0).toUpperCase() + str.slice(1);
}
}

View file

@ -16,69 +16,80 @@ class Kml::Importer
end
def call
file_content = load_kml_content
doc = REXML::Document.new(file_content)
points_data = []
# Process all Placemarks which can contain various geometry types
REXML::XPath.each(doc, '//Placemark') do |placemark|
points_data.concat(parse_placemark(placemark))
end
# Process gx:Track elements (Google Earth extensions for GPS tracks)
REXML::XPath.each(doc, '//gx:Track') do |track|
points_data.concat(parse_gx_track(track))
end
points_data.compact!
doc = load_and_parse_kml_document
points_data = extract_all_points(doc)
return if points_data.empty?
# Process in batches to avoid memory issues with large files
save_points_in_batches(points_data)
end
private
def load_and_parse_kml_document
file_content = load_kml_content
REXML::Document.new(file_content)
end
def extract_all_points(doc)
points_data = []
points_data.concat(extract_points_from_placemarks(doc))
points_data.concat(extract_points_from_gx_tracks(doc))
points_data.compact
end
def save_points_in_batches(points_data)
points_data.each_slice(1000) do |batch|
bulk_insert_points(batch)
end
end
private
def extract_points_from_placemarks(doc)
points = []
REXML::XPath.each(doc, '//Placemark') do |placemark|
points.concat(parse_placemark(placemark))
end
points
end
def extract_points_from_gx_tracks(doc)
points = []
REXML::XPath.each(doc, '//gx:Track') do |track|
points.concat(parse_gx_track(track))
end
points
end
def load_kml_content
# Read content in binary mode for ZIP detection
content = if file_path && File.exist?(file_path)
File.binread(file_path)
else
downloader_content = Imports::SecureFileDownloader.new(import.file).download_with_verification
# Convert StringIO to String if needed
downloader_content.is_a?(StringIO) ? downloader_content.read : downloader_content
end
content = read_file_content
content = ensure_binary_encoding(content)
kmz_file?(content) ? extract_kml_from_kmz(content) : content
end
# Ensure we have a binary string
content.force_encoding('BINARY') if content.respond_to?(:force_encoding)
# Check if this is a KMZ file (ZIP archive) by checking for ZIP signature
# ZIP files start with "PK" (0x50 0x4B)
if content[0..1] == 'PK'
extract_kml_from_kmz(content)
def read_file_content
if file_path && File.exist?(file_path)
File.binread(file_path)
else
content
download_and_read_content
end
end
def download_and_read_content
downloader_content = Imports::SecureFileDownloader.new(import.file).download_with_verification
downloader_content.is_a?(StringIO) ? downloader_content.read : downloader_content
end
def ensure_binary_encoding(content)
content.force_encoding('BINARY') if content.respond_to?(:force_encoding)
content
end
def kmz_file?(content)
content[0..1] == 'PK'
end
def extract_kml_from_kmz(kmz_content)
# KMZ files are ZIP archives containing a KML file (usually doc.kml)
# We need to extract the KML content from the ZIP
kml_content = nil
Zip::InputStream.open(StringIO.new(kmz_content)) do |io|
while (entry = io.get_next_entry)
if entry.name.downcase.end_with?('.kml')
kml_content = io.read
break
end
end
end
kml_content = find_kml_in_zip(kmz_content)
raise 'No KML file found in KMZ archive' unless kml_content
kml_content
@ -86,128 +97,165 @@ class Kml::Importer
raise "Failed to extract KML from KMZ: #{e.message}"
end
def parse_placemark(placemark)
points = []
def find_kml_in_zip(kmz_content)
kml_content = nil
return points unless has_explicit_timestamp?(placemark)
timestamp = extract_timestamp(placemark)
# Handle Point geometry
point_node = REXML::XPath.first(placemark, './/Point/coordinates')
if point_node
coords = parse_coordinates(point_node.text)
points << build_point(coords.first, timestamp, placemark) if coords.any?
end
# Handle LineString geometry (tracks/routes)
linestring_node = REXML::XPath.first(placemark, './/LineString/coordinates')
if linestring_node
coords = parse_coordinates(linestring_node.text)
coords.each do |coord|
points << build_point(coord, timestamp, placemark)
Zip::InputStream.open(StringIO.new(kmz_content)) do |io|
while (entry = io.get_next_entry)
if kml_entry?(entry)
kml_content = io.read
break
end
end
end
# Handle MultiGeometry (can contain multiple Points, LineStrings, etc.)
kml_content
end
def kml_entry?(entry)
entry.name.downcase.end_with?('.kml')
end
def parse_placemark(placemark)
return [] unless has_explicit_timestamp?(placemark)
timestamp = extract_timestamp(placemark)
points = []
points.concat(extract_point_geometry(placemark, timestamp))
points.concat(extract_linestring_geometry(placemark, timestamp))
points.concat(extract_multigeometry(placemark, timestamp))
points.compact
end
def extract_point_geometry(placemark, timestamp)
point_node = REXML::XPath.first(placemark, './/Point/coordinates')
return [] unless point_node
coords = parse_coordinates(point_node.text)
coords.any? ? [build_point(coords.first, timestamp, placemark)] : []
end
def extract_linestring_geometry(placemark, timestamp)
linestring_node = REXML::XPath.first(placemark, './/LineString/coordinates')
return [] unless linestring_node
coords = parse_coordinates(linestring_node.text)
coords.map { |coord| build_point(coord, timestamp, placemark) }
end
def extract_multigeometry(placemark, timestamp)
points = []
REXML::XPath.each(placemark, './/MultiGeometry//coordinates') do |coords_node|
coords = parse_coordinates(coords_node.text)
coords.each do |coord|
points << build_point(coord, timestamp, placemark)
end
end
points.compact
points
end
def parse_gx_track(track)
points = []
timestamps = extract_gx_timestamps(track)
coordinates = extract_gx_coordinates(track)
build_gx_track_points(timestamps, coordinates)
end
def extract_gx_timestamps(track)
timestamps = []
REXML::XPath.each(track, './/when') do |when_node|
timestamps << when_node.text.strip
end
timestamps
end
def extract_gx_coordinates(track)
coordinates = []
REXML::XPath.each(track, './/gx:coord') do |coord_node|
coordinates << coord_node.text.strip
end
coordinates
end
# Match timestamps with coordinates
[timestamps.size, coordinates.size].min.times do |i|
time = Time.parse(timestamps[i]).to_i
coord_parts = coordinates[i].split(/\s+/)
next if coord_parts.size < 2
def build_gx_track_points(timestamps, coordinates)
points = []
min_size = [timestamps.size, coordinates.size].min
lng, lat, alt = coord_parts.map(&:to_f)
points << {
lonlat: "POINT(#{lng} #{lat})",
altitude: alt&.to_i || 0,
timestamp: time,
import_id: import.id,
velocity: 0.0,
raw_data: { source: 'gx_track', index: i },
user_id: user_id,
created_at: Time.current,
updated_at: Time.current
}
rescue StandardError => e
Rails.logger.warn("Failed to parse gx:Track point at index #{i}: #{e.message}")
next
min_size.times do |i|
point = build_gx_track_point(timestamps[i], coordinates[i], i)
points << point if point
end
points
end
def build_gx_track_point(timestamp_str, coord_str, index)
time = Time.parse(timestamp_str).to_i
coord_parts = coord_str.split(/\s+/)
return nil if coord_parts.size < 2
lng, lat, alt = coord_parts.map(&:to_f)
{
lonlat: "POINT(#{lng} #{lat})",
altitude: alt&.to_i || 0,
timestamp: time,
import_id: import.id,
velocity: 0.0,
raw_data: { source: 'gx_track', index: index },
user_id: user_id,
created_at: Time.current,
updated_at: Time.current
}
rescue StandardError => e
Rails.logger.warn("Failed to parse gx:Track point at index #{index}: #{e.message}")
nil
end
def parse_coordinates(coord_text)
# KML coordinates format: "longitude,latitude[,altitude] ..."
# Multiple coordinates separated by whitespace
return [] if coord_text.blank?
coord_text.strip.split(/\s+/).map do |coord_str|
parts = coord_str.split(',')
next if parts.size < 2
coord_text.strip.split(/\s+/).map { |coord_str| parse_single_coordinate(coord_str) }.compact
end
{
lng: parts[0].to_f,
lat: parts[1].to_f,
alt: parts[2]&.to_f || 0.0
}
end.compact
def parse_single_coordinate(coord_str)
parts = coord_str.split(',')
return nil if parts.size < 2
{
lng: parts[0].to_f,
lat: parts[1].to_f,
alt: parts[2]&.to_f || 0.0
}
end
def has_explicit_timestamp?(placemark)
REXML::XPath.first(placemark, './/TimeStamp/when') ||
REXML::XPath.first(placemark, './/TimeSpan/begin') ||
REXML::XPath.first(placemark, './/TimeSpan/end')
find_timestamp_node(placemark).present?
end
def extract_timestamp(placemark)
# Try TimeStamp first
timestamp_node = REXML::XPath.first(placemark, './/TimeStamp/when')
return Time.parse(timestamp_node.text).to_i if timestamp_node
node = find_timestamp_node(placemark)
raise 'No timestamp found in placemark' unless node
# Try TimeSpan begin
timespan_begin = REXML::XPath.first(placemark, './/TimeSpan/begin')
return Time.parse(timespan_begin.text).to_i if timespan_begin
# Try TimeSpan end as fallback
timespan_end = REXML::XPath.first(placemark, './/TimeSpan/end')
return Time.parse(timespan_end.text).to_i if timespan_end
# No timestamp found - this should not happen if has_explicit_timestamp? was checked
raise 'No timestamp found in placemark'
Time.parse(node.text).to_i
rescue StandardError => e
Rails.logger.error("Failed to parse timestamp: #{e.message}")
raise e
end
def find_timestamp_node(placemark)
REXML::XPath.first(placemark, './/TimeStamp/when') ||
REXML::XPath.first(placemark, './/TimeSpan/begin') ||
REXML::XPath.first(placemark, './/TimeSpan/end')
end
def build_point(coord, timestamp, placemark)
return if coord[:lat].blank? || coord[:lng].blank?
return if invalid_coordinates?(coord)
{
lonlat: "POINT(#{coord[:lng]} #{coord[:lat]})",
lonlat: format_point_geometry(coord),
altitude: coord[:alt].to_i,
timestamp: timestamp,
import_id: import.id,
@ -219,31 +267,52 @@ class Kml::Importer
}
end
def invalid_coordinates?(coord)
coord[:lat].blank? || coord[:lng].blank?
end
def format_point_geometry(coord)
"POINT(#{coord[:lng]} #{coord[:lat]})"
end
def extract_velocity(placemark)
# Try to extract speed from ExtendedData
speed_node = REXML::XPath.first(placemark, ".//Data[@name='speed']/value") ||
REXML::XPath.first(placemark, ".//Data[@name='Speed']/value") ||
REXML::XPath.first(placemark, ".//Data[@name='velocity']/value")
return speed_node.text.to_f.round(1) if speed_node
0.0
speed_node = find_speed_node(placemark)
speed_node ? speed_node.text.to_f.round(1) : 0.0
rescue StandardError
0.0
end
def find_speed_node(placemark)
REXML::XPath.first(placemark, ".//Data[@name='speed']/value") ||
REXML::XPath.first(placemark, ".//Data[@name='Speed']/value") ||
REXML::XPath.first(placemark, ".//Data[@name='velocity']/value")
end
def extract_extended_data(placemark)
data = {}
data.merge!(extract_name_and_description(placemark))
data.merge!(extract_custom_data_fields(placemark))
data
rescue StandardError => e
Rails.logger.warn("Failed to extract extended data: #{e.message}")
{}
end
def extract_name_and_description(placemark)
data = {}
# Extract name if present
name_node = REXML::XPath.first(placemark, './/name')
data['name'] = name_node.text.strip if name_node
# Extract description if present
desc_node = REXML::XPath.first(placemark, './/description')
data['description'] = desc_node.text.strip if desc_node
# Extract all ExtendedData/Data elements
data
end
def extract_custom_data_fields(placemark)
data = {}
REXML::XPath.each(placemark, './/ExtendedData/Data') do |data_node|
name = data_node.attributes['name']
value_node = REXML::XPath.first(data_node, './value')
@ -251,26 +320,29 @@ class Kml::Importer
end
data
rescue StandardError => e
Rails.logger.warn("Failed to extract extended data: #{e.message}")
{}
end
def bulk_insert_points(batch)
unique_batch = batch.uniq { |record| [record[:lonlat], record[:timestamp], record[:user_id]] }
unique_batch = deduplicate_batch(batch)
upsert_points(unique_batch)
broadcast_import_progress(import, unique_batch.size)
rescue StandardError => e
create_notification("Failed to process KML file: #{e.message}")
end
def deduplicate_batch(batch)
batch.uniq { |record| [record[:lonlat], record[:timestamp], record[:user_id]] }
end
def upsert_points(batch)
# rubocop:disable Rails/SkipsModelValidations
Point.upsert_all(
unique_batch,
batch,
unique_by: %i[lonlat timestamp user_id],
returning: false,
on_duplicate: :skip
)
# rubocop:enable Rails/SkipsModelValidations
broadcast_import_progress(import, unique_batch.size)
rescue StandardError => e
create_notification("Failed to process KML file: #{e.message}")
end
def create_notification(message)