dawarich/app/services/imports/source_detector.rb

259 lines
6.9 KiB
Ruby
Raw Normal View History

2025-08-22 13:10:40 -04:00
# frozen_string_literal: true
class Imports::SourceDetector
class UnknownSourceError < StandardError; end
DETECTION_RULES = {
google_semantic_history: {
required_keys: ['timelineObjects'],
nested_patterns: [
['timelineObjects', 0, 'activitySegment'],
['timelineObjects', 0, 'placeVisit']
]
},
google_records: {
required_keys: ['locations'],
nested_patterns: [
['locations', 0, 'latitudeE7'],
['locations', 0, 'longitudeE7']
]
},
google_phone_takeout: {
alternative_patterns: [
# Pattern 1: Object with semanticSegments
{
required_keys: ['semanticSegments'],
nested_patterns: [['semanticSegments', 0, 'startTime']]
},
# Pattern 2: Object with rawSignals
{
required_keys: ['rawSignals']
},
# Pattern 3: Array format with visit/activity objects
{
structure: :array,
nested_patterns: [
[0, 'visit', 'topCandidate', 'placeLocation'],
[0, 'activity']
]
}
]
},
geojson: {
required_keys: %w[type features],
2025-08-22 13:10:40 -04:00
required_values: { 'type' => 'FeatureCollection' },
nested_patterns: [
['features', 0, 'type'],
['features', 0, 'geometry'],
['features', 0, 'properties']
]
},
owntracks: {
structure: :rec_file_lines,
line_pattern: /"_type":"location"/
}
}.freeze
def initialize(file_content, filename = nil, file_path = nil)
@file_content = file_content
@filename = filename
@file_path = file_path
end
2025-08-22 14:40:06 -04:00
def self.new_from_file_header(file_path)
2025-08-22 13:10:40 -04:00
filename = File.basename(file_path)
2025-08-23 10:07:15 -04:00
2025-08-22 13:10:40 -04:00
# For detection, read only first 2KB to optimize performance
header_content = File.open(file_path, 'rb') { |f| f.read(2048) }
2025-08-23 10:07:15 -04:00
2025-08-22 13:10:40 -04:00
new(header_content, filename, file_path)
end
def detect_source
return :gpx if gpx_file?
0.36.0 (#1952) * Implement OmniAuth GitHub authentication * Fix omniauth GitHub scope to include user email access * Remove margin-bottom * Implement Google OAuth2 authentication * Implement OIDC authentication for Dawarich using omniauth_openid_connect gem. * Add patreon account linking and patron checking service * Update docker-compose.yml to use boolean values instead of strings * Add support for KML files * Add tests * Update changelog * Remove patreon OAuth integration * Move omniauthable to a concern * Update an icon in integrations * Update changelog * Update app version * Fix family location sharing toggle * Move family location sharing to its own controller * Update changelog * Implement basic tagging functionality for places, allowing users to categorize and label places with custom tags. * Add places management API and tags feature * Add some changes related to places management feature * Fix some tests * Fix sometests * Add places layer * Update places layer to use Leaflet.Control.Layers.Tree for hierarchical layer control * Rework tag form * Add hashtag * Add privacy zones to tags * Add notes to places and manage place tags * Update changelog * Update e2e tests * Extract tag serializer to its own file * Fix some tests * Fix tags request specs * Fix some tests * Fix rest of the tests * Revert some changes * Add missing specs * Revert changes in place export/import code * Fix some specs * Fix PlaceFinder to only consider global places when finding existing places * Fix few more specs * Fix visits creator spec * Fix last tests * Update place creating modal * Add home location based on "Home" tagged place * Save enabled tag layers * Some fixes * Fix bug where enabling place tag layers would trigger saving enabled layers, overwriting with incomplete data * Update migration to use disable_ddl_transaction! and add up/down methods * Fix tag layers restoration and filtering logic * Update OIDC auto-registration and email/password registration settings * Fix potential xss
2025-11-24 13:45:09 -05:00
return :kml if kml_file?
2025-08-22 13:10:40 -04:00
return :owntracks if owntracks_file?
json_data = parse_json
return nil unless json_data
DETECTION_RULES.each do |format, rules|
next if format == :owntracks # Already handled above
return format if matches_format?(json_data, rules)
2025-08-22 13:10:40 -04:00
end
nil
end
def detect_source!
format = detect_source
raise UnknownSourceError, 'Unable to detect file format' unless format
format
end
private
attr_reader :file_content, :filename, :file_path
def gpx_file?
return false unless filename
# Must have .gpx extension AND contain GPX XML structure
return false unless filename.downcase.end_with?('.gpx')
2025-08-23 10:07:15 -04:00
2025-08-22 13:10:40 -04:00
# Check content for GPX structure
content_to_check =
if file_path && File.exist?(file_path)
# Read first 1KB for GPX detection
File.open(file_path, 'rb') { |f| f.read(1024) }
else
file_content
end
(
content_to_check.strip.start_with?('<?xml') ||
content_to_check.strip.start_with?('<gpx')
) && content_to_check.include?('<gpx')
2025-08-22 13:10:40 -04:00
end
0.36.0 (#1952) * Implement OmniAuth GitHub authentication * Fix omniauth GitHub scope to include user email access * Remove margin-bottom * Implement Google OAuth2 authentication * Implement OIDC authentication for Dawarich using omniauth_openid_connect gem. * Add patreon account linking and patron checking service * Update docker-compose.yml to use boolean values instead of strings * Add support for KML files * Add tests * Update changelog * Remove patreon OAuth integration * Move omniauthable to a concern * Update an icon in integrations * Update changelog * Update app version * Fix family location sharing toggle * Move family location sharing to its own controller * Update changelog * Implement basic tagging functionality for places, allowing users to categorize and label places with custom tags. * Add places management API and tags feature * Add some changes related to places management feature * Fix some tests * Fix sometests * Add places layer * Update places layer to use Leaflet.Control.Layers.Tree for hierarchical layer control * Rework tag form * Add hashtag * Add privacy zones to tags * Add notes to places and manage place tags * Update changelog * Update e2e tests * Extract tag serializer to its own file * Fix some tests * Fix tags request specs * Fix some tests * Fix rest of the tests * Revert some changes * Add missing specs * Revert changes in place export/import code * Fix some specs * Fix PlaceFinder to only consider global places when finding existing places * Fix few more specs * Fix visits creator spec * Fix last tests * Update place creating modal * Add home location based on "Home" tagged place * Save enabled tag layers * Some fixes * Fix bug where enabling place tag layers would trigger saving enabled layers, overwriting with incomplete data * Update migration to use disable_ddl_transaction! and add up/down methods * Fix tag layers restoration and filtering logic * Update OIDC auto-registration and email/password registration settings * Fix potential xss
2025-11-24 13:45:09 -05:00
def kml_file?
return false unless filename&.downcase&.end_with?('.kml', '.kmz')
content_to_check =
if file_path && File.exist?(file_path)
# Read first 1KB for KML detection
File.open(file_path, 'rb') { |f| f.read(1024) }
else
file_content
end
0.36.3 (#2013) * fix: move foreman to global gems to fix startup crash (#1971) * Update exporting code to stream points data to file in batches to red… (#1980) * Update exporting code to stream points data to file in batches to reduce memory usage * Update changelog * Update changelog * Feature/maplibre frontend (#1953) * Add a plan to use MapLibre GL JS for the frontend map rendering, replacing Leaflet * Implement phase 1 * Phases 1-3 + part of 4 * Fix e2e tests * Phase 6 * Implement fog of war * Phase 7 * Next step: fix specs, phase 7 done * Use our own map tiles * Extract v2 map logic to separate manager classes * Update settings panel on v2 map * Update v2 e2e tests structure * Reimplement location search in maps v2 * Update speed routes * Implement visits and places creation in v2 * Fix last failing test * Implement visits merging * Fix a routes e2e test and simplify the routes layer styling. * Extract js to modules from maps_v2_controller.js * Implement area creation * Fix spec problem * Fix some e2e tests * Implement live mode in v2 map * Update icons and panel * Extract some styles * Remove unused file * Start adding dark theme to popups on MapLibre maps * Make popups respect dark theme * Move v2 maps to maplibre namespace * Update v2 references to maplibre * Put place, area and visit info into side panel * Update API to use safe settings config method * Fix specs * Fix method name to config in SafeSettings and update usages accordingly * Add missing public files * Add handling for real time points * Fix remembering enabled/disabled layers of the v2 map * Fix lots of e2e tests * Add settings to select map version * Use maps/v2 as main path for MapLibre maps * Update routing * Update live mode * Update maplibre controller * Update changelog * Remove some console.log statements * Pull only necessary data for map v2 points * Feature/raw data archive (#2009) * 0.36.2 (#2007) * fix: move foreman to global gems to fix startup crash (#1971) * Update exporting code to stream points data to file in batches to red… (#1980) * Update exporting code to stream points data to file in batches to reduce memory usage * Update changelog * Update changelog * Feature/maplibre frontend (#1953) * Add a plan to use MapLibre GL JS for the frontend map rendering, replacing Leaflet * Implement phase 1 * Phases 1-3 + part of 4 * Fix e2e tests * Phase 6 * Implement fog of war * Phase 7 * Next step: fix specs, phase 7 done * Use our own map tiles * Extract v2 map logic to separate manager classes * Update settings panel on v2 map * Update v2 e2e tests structure * Reimplement location search in maps v2 * Update speed routes * Implement visits and places creation in v2 * Fix last failing test * Implement visits merging * Fix a routes e2e test and simplify the routes layer styling. * Extract js to modules from maps_v2_controller.js * Implement area creation * Fix spec problem * Fix some e2e tests * Implement live mode in v2 map * Update icons and panel * Extract some styles * Remove unused file * Start adding dark theme to popups on MapLibre maps * Make popups respect dark theme * Move v2 maps to maplibre namespace * Update v2 references to maplibre * Put place, area and visit info into side panel * Update API to use safe settings config method * Fix specs * Fix method name to config in SafeSettings and update usages accordingly * Add missing public files * Add handling for real time points * Fix remembering enabled/disabled layers of the v2 map * Fix lots of e2e tests * Add settings to select map version * Use maps/v2 as main path for MapLibre maps * Update routing * Update live mode * Update maplibre controller * Update changelog * Remove some console.log statements --------- Co-authored-by: Robin Tuszik <mail@robin.gg> * Remove esbuild scripts from package.json * Remove sideEffects field from package.json * Raw data archivation * Add tests * Fix tests * Fix tests * Update ExceptionReporter * Add schedule to run raw data archival job monthly * Change file structure for raw data archival feature * Update changelog and version for raw data archival feature --------- Co-authored-by: Robin Tuszik <mail@robin.gg> * Set raw_data to an empty hash instead of nil when archiving * Fix storage configuration and file extraction * Consider MIN_MINUTES_SPENT_IN_CITY during stats calculation (#2018) * Consider MIN_MINUTES_SPENT_IN_CITY during stats calculation * Remove raw data from visited cities api endpoint * Use user timezone to show dates on maps (#2020) * Fix/pre epoch time (#2019) * Use user timezone to show dates on maps * Limit timestamps to valid range to prevent database errors when users enter pre-epoch dates. * Limit timestamps to valid range to prevent database errors when users enter pre-epoch dates. * Fix tests failing due to new index on stats table * Fix failing specs * Update redis client configuration to support unix socket connection * Update changelog * Fix kml kmz import issues (#2023) * Fix kml kmz import issues * Refactor KML importer to improve readability and maintainability * Implement moving points in map v2 and fix route rendering logic to ma… (#2027) * Implement moving points in map v2 and fix route rendering logic to match map v1. * Fix route spec * fix(maplibre): update date format to ISO 8601 (#2029) * Add verification step to raw data archival process (#2028) * Add verification step to raw data archival process * Add actual verification of raw data archives after creation, and only clear raw_data for verified archives. * Fix failing specs * Eliminate zip-bomb risk * Fix potential memory leak in js * Return .keep files * Use Toast instead of alert for notifications * Add help section to navbar dropdown * Update changelog * Remove raw_data_archival_job * Ensure file is being closed properly after reading in Archivable concern --------- Co-authored-by: Robin Tuszik <mail@robin.gg>
2025-12-14 06:05:59 -05:00
# Check if it's a KMZ file (ZIP archive)
if filename&.downcase&.end_with?('.kmz')
# KMZ files are ZIP archives, check for ZIP signature
# ZIP files start with "PK" (0x50 0x4B)
return content_to_check[0..1] == 'PK'
end
# For KML files, check XML structure
0.36.0 (#1952) * Implement OmniAuth GitHub authentication * Fix omniauth GitHub scope to include user email access * Remove margin-bottom * Implement Google OAuth2 authentication * Implement OIDC authentication for Dawarich using omniauth_openid_connect gem. * Add patreon account linking and patron checking service * Update docker-compose.yml to use boolean values instead of strings * Add support for KML files * Add tests * Update changelog * Remove patreon OAuth integration * Move omniauthable to a concern * Update an icon in integrations * Update changelog * Update app version * Fix family location sharing toggle * Move family location sharing to its own controller * Update changelog * Implement basic tagging functionality for places, allowing users to categorize and label places with custom tags. * Add places management API and tags feature * Add some changes related to places management feature * Fix some tests * Fix sometests * Add places layer * Update places layer to use Leaflet.Control.Layers.Tree for hierarchical layer control * Rework tag form * Add hashtag * Add privacy zones to tags * Add notes to places and manage place tags * Update changelog * Update e2e tests * Extract tag serializer to its own file * Fix some tests * Fix tags request specs * Fix some tests * Fix rest of the tests * Revert some changes * Add missing specs * Revert changes in place export/import code * Fix some specs * Fix PlaceFinder to only consider global places when finding existing places * Fix few more specs * Fix visits creator spec * Fix last tests * Update place creating modal * Add home location based on "Home" tagged place * Save enabled tag layers * Some fixes * Fix bug where enabling place tag layers would trigger saving enabled layers, overwriting with incomplete data * Update migration to use disable_ddl_transaction! and add up/down methods * Fix tag layers restoration and filtering logic * Update OIDC auto-registration and email/password registration settings * Fix potential xss
2025-11-24 13:45:09 -05:00
(
content_to_check.strip.start_with?('<?xml') ||
content_to_check.strip.start_with?('<kml')
) && content_to_check.include?('<kml')
end
2025-08-22 13:10:40 -04:00
def owntracks_file?
return false unless filename
# Check for .rec extension first (fastest check)
return true if filename.downcase.end_with?('.rec')
2025-08-23 10:07:15 -04:00
2025-08-22 13:10:40 -04:00
# Check for specific OwnTracks line format in content
content_to_check = if file_path && File.exist?(file_path)
# For OwnTracks, read first few lines only
File.open(file_path, 'r') { |f| f.read(2048) }
else
file_content
end
2025-08-23 10:07:15 -04:00
2025-08-22 13:10:40 -04:00
content_to_check.lines.any? { |line| line.include?('"_type":"location"') }
end
def parse_json
# If we have a file path, use streaming for better memory efficiency
if file_path && File.exist?(file_path)
Oj.load_file(file_path, mode: :compat)
else
Oj.load(file_content, mode: :compat)
end
rescue Oj::ParseError, JSON::ParserError
# If full file parsing fails but we have a file path, try with just the header
if file_path && file_content.length < 2048
begin
File.open(file_path, 'rb') do |f|
partial_content = f.read(4096) # Try a bit more content
Oj.load(partial_content, mode: :compat)
end
rescue Oj::ParseError, JSON::ParserError
nil
end
else
nil
end
end
def matches_format?(json_data, rules)
# Handle alternative patterns (for google_phone_takeout)
if rules[:alternative_patterns]
return rules[:alternative_patterns].any? { |pattern| matches_pattern?(json_data, pattern) }
end
matches_pattern?(json_data, rules)
end
def matches_pattern?(json_data, pattern)
# Check structure requirements
return false unless structure_matches?(json_data, pattern[:structure])
# Check required keys
return false if pattern[:required_keys] && !has_required_keys?(json_data, pattern[:required_keys])
2025-08-22 13:10:40 -04:00
# Check required values
return false if pattern[:required_values] && !has_required_values?(json_data, pattern[:required_values])
2025-08-22 13:10:40 -04:00
# Check nested patterns
return false if pattern[:nested_patterns] && !has_nested_patterns?(json_data, pattern[:nested_patterns])
2025-08-22 13:10:40 -04:00
true
end
def structure_matches?(json_data, required_structure)
case required_structure
when :array
json_data.is_a?(Array)
when nil
true # No specific structure required
else
true # Default to no restriction
end
end
def has_required_keys?(json_data, keys)
return false unless json_data.is_a?(Hash)
keys.all? { |key| json_data.key?(key) }
end
def has_required_values?(json_data, values)
return false unless json_data.is_a?(Hash)
values.all? { |key, expected_value| json_data[key] == expected_value }
end
def has_nested_patterns?(json_data, patterns)
patterns.any? { |pattern| nested_key_exists?(json_data, pattern) }
end
def nested_key_exists?(data, key_path)
current = data
key_path.each do |key|
return false unless current
if current.is_a?(Array)
return false if key >= current.length
2025-08-22 13:10:40 -04:00
current = current[key]
elsif current.is_a?(Hash)
return false unless current.key?(key)
2025-08-22 13:10:40 -04:00
current = current[key]
else
return false
end
end
!current.nil?
end
end