From 68bde7e32f5397d9880f6b53e54ba32ff6aee47a Mon Sep 17 00:00:00 2001 From: Eugene Burmakin Date: Tue, 9 Dec 2025 19:36:34 +0100 Subject: [PATCH] Refactor KML importer to improve readability and maintainability --- README.md | 2 - .../controllers/imports_controller.js | 30 +- app/services/kml/importer.rb | 352 +++++++++++------- 3 files changed, 213 insertions(+), 171 deletions(-) diff --git a/README.md b/README.md index e61d84ae..7257d484 100644 --- a/README.md +++ b/README.md @@ -2,8 +2,6 @@ [![Discord](https://dcbadge.limes.pink/api/server/pHsBjpt5J8)](https://discord.gg/pHsBjpt5J8) | [![ko-fi](https://ko-fi.com/img/githubbutton_sm.svg)](https://ko-fi.com/H2H3IDYDD) | [![Patreon](https://img.shields.io/endpoint.svg?url=https%3A%2F%2Fshieldsio-patreon.vercel.app%2Fapi%3Fusername%3Dfreika%26type%3Dpatrons&style=for-the-badge)](https://www.patreon.com/freika) -[![CircleCI](https://circleci.com/gh/Freika/dawarich.svg?style=svg)](https://app.circleci.com/pipelines/github/Freika/dawarich) - --- ## 📸 Screenshots diff --git a/app/javascript/controllers/imports_controller.js b/app/javascript/controllers/imports_controller.js index 1ac6d1c2..4a864074 100644 --- a/app/javascript/controllers/imports_controller.js +++ b/app/javascript/controllers/imports_controller.js @@ -34,7 +34,7 @@ export default class extends BaseController { const statusCell = row.querySelector('[data-status-display]'); if (statusCell && data.import.status) { - statusCell.innerHTML = this.renderStatusBadge(data.import.status); + statusCell.textContent = data.import.status; } } } @@ -47,32 +47,4 @@ export default class extends BaseController { this.channel.unsubscribe(); } } - - renderStatusBadge(status) { - const statusLower = status.toLowerCase(); - - switch(statusLower) { - case 'completed': - return ` - ✓ - Completed - `; - case 'processing': - return ` - - Processing - `; - case 'failed': - return ` - ✕ - Failed - `; - default: - return `${this.capitalize(status)}`; - } - } - - capitalize(str) { - return str.charAt(0).toUpperCase() + str.slice(1); - } } diff --git a/app/services/kml/importer.rb b/app/services/kml/importer.rb index 6b96d8ff..39df1683 100644 --- a/app/services/kml/importer.rb +++ b/app/services/kml/importer.rb @@ -16,69 +16,80 @@ class Kml::Importer end def call - file_content = load_kml_content - doc = REXML::Document.new(file_content) - - points_data = [] - - # Process all Placemarks which can contain various geometry types - REXML::XPath.each(doc, '//Placemark') do |placemark| - points_data.concat(parse_placemark(placemark)) - end - - # Process gx:Track elements (Google Earth extensions for GPS tracks) - REXML::XPath.each(doc, '//gx:Track') do |track| - points_data.concat(parse_gx_track(track)) - end - - points_data.compact! + doc = load_and_parse_kml_document + points_data = extract_all_points(doc) return if points_data.empty? - # Process in batches to avoid memory issues with large files + save_points_in_batches(points_data) + end + + private + + def load_and_parse_kml_document + file_content = load_kml_content + REXML::Document.new(file_content) + end + + def extract_all_points(doc) + points_data = [] + points_data.concat(extract_points_from_placemarks(doc)) + points_data.concat(extract_points_from_gx_tracks(doc)) + points_data.compact + end + + def save_points_in_batches(points_data) points_data.each_slice(1000) do |batch| bulk_insert_points(batch) end end - private + def extract_points_from_placemarks(doc) + points = [] + REXML::XPath.each(doc, '//Placemark') do |placemark| + points.concat(parse_placemark(placemark)) + end + points + end + + def extract_points_from_gx_tracks(doc) + points = [] + REXML::XPath.each(doc, '//gx:Track') do |track| + points.concat(parse_gx_track(track)) + end + points + end def load_kml_content - # Read content in binary mode for ZIP detection - content = if file_path && File.exist?(file_path) - File.binread(file_path) - else - downloader_content = Imports::SecureFileDownloader.new(import.file).download_with_verification - # Convert StringIO to String if needed - downloader_content.is_a?(StringIO) ? downloader_content.read : downloader_content - end + content = read_file_content + content = ensure_binary_encoding(content) + kmz_file?(content) ? extract_kml_from_kmz(content) : content + end - # Ensure we have a binary string - content.force_encoding('BINARY') if content.respond_to?(:force_encoding) - - # Check if this is a KMZ file (ZIP archive) by checking for ZIP signature - # ZIP files start with "PK" (0x50 0x4B) - if content[0..1] == 'PK' - extract_kml_from_kmz(content) + def read_file_content + if file_path && File.exist?(file_path) + File.binread(file_path) else - content + download_and_read_content end end + def download_and_read_content + downloader_content = Imports::SecureFileDownloader.new(import.file).download_with_verification + downloader_content.is_a?(StringIO) ? downloader_content.read : downloader_content + end + + def ensure_binary_encoding(content) + content.force_encoding('BINARY') if content.respond_to?(:force_encoding) + content + end + + def kmz_file?(content) + content[0..1] == 'PK' + end + def extract_kml_from_kmz(kmz_content) - # KMZ files are ZIP archives containing a KML file (usually doc.kml) - # We need to extract the KML content from the ZIP - kml_content = nil - - Zip::InputStream.open(StringIO.new(kmz_content)) do |io| - while (entry = io.get_next_entry) - if entry.name.downcase.end_with?('.kml') - kml_content = io.read - break - end - end - end - + kml_content = find_kml_in_zip(kmz_content) raise 'No KML file found in KMZ archive' unless kml_content kml_content @@ -86,128 +97,165 @@ class Kml::Importer raise "Failed to extract KML from KMZ: #{e.message}" end - def parse_placemark(placemark) - points = [] + def find_kml_in_zip(kmz_content) + kml_content = nil - return points unless has_explicit_timestamp?(placemark) - - timestamp = extract_timestamp(placemark) - - # Handle Point geometry - point_node = REXML::XPath.first(placemark, './/Point/coordinates') - if point_node - coords = parse_coordinates(point_node.text) - points << build_point(coords.first, timestamp, placemark) if coords.any? - end - - # Handle LineString geometry (tracks/routes) - linestring_node = REXML::XPath.first(placemark, './/LineString/coordinates') - if linestring_node - coords = parse_coordinates(linestring_node.text) - coords.each do |coord| - points << build_point(coord, timestamp, placemark) + Zip::InputStream.open(StringIO.new(kmz_content)) do |io| + while (entry = io.get_next_entry) + if kml_entry?(entry) + kml_content = io.read + break + end end end - # Handle MultiGeometry (can contain multiple Points, LineStrings, etc.) + kml_content + end + + def kml_entry?(entry) + entry.name.downcase.end_with?('.kml') + end + + def parse_placemark(placemark) + return [] unless has_explicit_timestamp?(placemark) + + timestamp = extract_timestamp(placemark) + points = [] + + points.concat(extract_point_geometry(placemark, timestamp)) + points.concat(extract_linestring_geometry(placemark, timestamp)) + points.concat(extract_multigeometry(placemark, timestamp)) + + points.compact + end + + def extract_point_geometry(placemark, timestamp) + point_node = REXML::XPath.first(placemark, './/Point/coordinates') + return [] unless point_node + + coords = parse_coordinates(point_node.text) + coords.any? ? [build_point(coords.first, timestamp, placemark)] : [] + end + + def extract_linestring_geometry(placemark, timestamp) + linestring_node = REXML::XPath.first(placemark, './/LineString/coordinates') + return [] unless linestring_node + + coords = parse_coordinates(linestring_node.text) + coords.map { |coord| build_point(coord, timestamp, placemark) } + end + + def extract_multigeometry(placemark, timestamp) + points = [] REXML::XPath.each(placemark, './/MultiGeometry//coordinates') do |coords_node| coords = parse_coordinates(coords_node.text) coords.each do |coord| points << build_point(coord, timestamp, placemark) end end - - points.compact + points end def parse_gx_track(track) - points = [] + timestamps = extract_gx_timestamps(track) + coordinates = extract_gx_coordinates(track) + build_gx_track_points(timestamps, coordinates) + end + + def extract_gx_timestamps(track) timestamps = [] REXML::XPath.each(track, './/when') do |when_node| timestamps << when_node.text.strip end + timestamps + end + def extract_gx_coordinates(track) coordinates = [] REXML::XPath.each(track, './/gx:coord') do |coord_node| coordinates << coord_node.text.strip end + coordinates + end - # Match timestamps with coordinates - [timestamps.size, coordinates.size].min.times do |i| - time = Time.parse(timestamps[i]).to_i - coord_parts = coordinates[i].split(/\s+/) - next if coord_parts.size < 2 + def build_gx_track_points(timestamps, coordinates) + points = [] + min_size = [timestamps.size, coordinates.size].min - lng, lat, alt = coord_parts.map(&:to_f) - - points << { - lonlat: "POINT(#{lng} #{lat})", - altitude: alt&.to_i || 0, - timestamp: time, - import_id: import.id, - velocity: 0.0, - raw_data: { source: 'gx_track', index: i }, - user_id: user_id, - created_at: Time.current, - updated_at: Time.current - } - rescue StandardError => e - Rails.logger.warn("Failed to parse gx:Track point at index #{i}: #{e.message}") - next + min_size.times do |i| + point = build_gx_track_point(timestamps[i], coordinates[i], i) + points << point if point end points end + def build_gx_track_point(timestamp_str, coord_str, index) + time = Time.parse(timestamp_str).to_i + coord_parts = coord_str.split(/\s+/) + return nil if coord_parts.size < 2 + + lng, lat, alt = coord_parts.map(&:to_f) + + { + lonlat: "POINT(#{lng} #{lat})", + altitude: alt&.to_i || 0, + timestamp: time, + import_id: import.id, + velocity: 0.0, + raw_data: { source: 'gx_track', index: index }, + user_id: user_id, + created_at: Time.current, + updated_at: Time.current + } + rescue StandardError => e + Rails.logger.warn("Failed to parse gx:Track point at index #{index}: #{e.message}") + nil + end + def parse_coordinates(coord_text) - # KML coordinates format: "longitude,latitude[,altitude] ..." - # Multiple coordinates separated by whitespace return [] if coord_text.blank? - coord_text.strip.split(/\s+/).map do |coord_str| - parts = coord_str.split(',') - next if parts.size < 2 + coord_text.strip.split(/\s+/).map { |coord_str| parse_single_coordinate(coord_str) }.compact + end - { - lng: parts[0].to_f, - lat: parts[1].to_f, - alt: parts[2]&.to_f || 0.0 - } - end.compact + def parse_single_coordinate(coord_str) + parts = coord_str.split(',') + return nil if parts.size < 2 + + { + lng: parts[0].to_f, + lat: parts[1].to_f, + alt: parts[2]&.to_f || 0.0 + } end def has_explicit_timestamp?(placemark) - REXML::XPath.first(placemark, './/TimeStamp/when') || - REXML::XPath.first(placemark, './/TimeSpan/begin') || - REXML::XPath.first(placemark, './/TimeSpan/end') + find_timestamp_node(placemark).present? end def extract_timestamp(placemark) - # Try TimeStamp first - timestamp_node = REXML::XPath.first(placemark, './/TimeStamp/when') - return Time.parse(timestamp_node.text).to_i if timestamp_node + node = find_timestamp_node(placemark) + raise 'No timestamp found in placemark' unless node - # Try TimeSpan begin - timespan_begin = REXML::XPath.first(placemark, './/TimeSpan/begin') - return Time.parse(timespan_begin.text).to_i if timespan_begin - - # Try TimeSpan end as fallback - timespan_end = REXML::XPath.first(placemark, './/TimeSpan/end') - return Time.parse(timespan_end.text).to_i if timespan_end - - # No timestamp found - this should not happen if has_explicit_timestamp? was checked - raise 'No timestamp found in placemark' + Time.parse(node.text).to_i rescue StandardError => e Rails.logger.error("Failed to parse timestamp: #{e.message}") raise e end + def find_timestamp_node(placemark) + REXML::XPath.first(placemark, './/TimeStamp/when') || + REXML::XPath.first(placemark, './/TimeSpan/begin') || + REXML::XPath.first(placemark, './/TimeSpan/end') + end + def build_point(coord, timestamp, placemark) - return if coord[:lat].blank? || coord[:lng].blank? + return if invalid_coordinates?(coord) { - lonlat: "POINT(#{coord[:lng]} #{coord[:lat]})", + lonlat: format_point_geometry(coord), altitude: coord[:alt].to_i, timestamp: timestamp, import_id: import.id, @@ -219,31 +267,52 @@ class Kml::Importer } end + def invalid_coordinates?(coord) + coord[:lat].blank? || coord[:lng].blank? + end + + def format_point_geometry(coord) + "POINT(#{coord[:lng]} #{coord[:lat]})" + end + def extract_velocity(placemark) - # Try to extract speed from ExtendedData - speed_node = REXML::XPath.first(placemark, ".//Data[@name='speed']/value") || - REXML::XPath.first(placemark, ".//Data[@name='Speed']/value") || - REXML::XPath.first(placemark, ".//Data[@name='velocity']/value") - - return speed_node.text.to_f.round(1) if speed_node - - 0.0 + speed_node = find_speed_node(placemark) + speed_node ? speed_node.text.to_f.round(1) : 0.0 rescue StandardError 0.0 end + def find_speed_node(placemark) + REXML::XPath.first(placemark, ".//Data[@name='speed']/value") || + REXML::XPath.first(placemark, ".//Data[@name='Speed']/value") || + REXML::XPath.first(placemark, ".//Data[@name='velocity']/value") + end + def extract_extended_data(placemark) data = {} + data.merge!(extract_name_and_description(placemark)) + data.merge!(extract_custom_data_fields(placemark)) + data + rescue StandardError => e + Rails.logger.warn("Failed to extract extended data: #{e.message}") + {} + end + + def extract_name_and_description(placemark) + data = {} - # Extract name if present name_node = REXML::XPath.first(placemark, './/name') data['name'] = name_node.text.strip if name_node - # Extract description if present desc_node = REXML::XPath.first(placemark, './/description') data['description'] = desc_node.text.strip if desc_node - # Extract all ExtendedData/Data elements + data + end + + def extract_custom_data_fields(placemark) + data = {} + REXML::XPath.each(placemark, './/ExtendedData/Data') do |data_node| name = data_node.attributes['name'] value_node = REXML::XPath.first(data_node, './value') @@ -251,26 +320,29 @@ class Kml::Importer end data - rescue StandardError => e - Rails.logger.warn("Failed to extract extended data: #{e.message}") - {} end def bulk_insert_points(batch) - unique_batch = batch.uniq { |record| [record[:lonlat], record[:timestamp], record[:user_id]] } + unique_batch = deduplicate_batch(batch) + upsert_points(unique_batch) + broadcast_import_progress(import, unique_batch.size) + rescue StandardError => e + create_notification("Failed to process KML file: #{e.message}") + end + def deduplicate_batch(batch) + batch.uniq { |record| [record[:lonlat], record[:timestamp], record[:user_id]] } + end + + def upsert_points(batch) # rubocop:disable Rails/SkipsModelValidations Point.upsert_all( - unique_batch, + batch, unique_by: %i[lonlat timestamp user_id], returning: false, on_duplicate: :skip ) # rubocop:enable Rails/SkipsModelValidations - - broadcast_import_progress(import, unique_batch.size) - rescue StandardError => e - create_notification("Failed to process KML file: #{e.message}") end def create_notification(message)