From a93b49ee801addc16e4fcf00878fce4273a7964f Mon Sep 17 00:00:00 2001 From: Eugene Burmakin Date: Sun, 23 Mar 2025 18:37:10 +0100 Subject: [PATCH 01/11] Use Active Storage to import GPX files --- Gemfile | 1 - app/controllers/imports_controller.rb | 12 ++-------- app/jobs/import/process_job.rb | 11 +++++++++ app/jobs/import_job.rb | 12 ---------- app/models/import.rb | 4 +++- app/models/user.rb | 15 ++++++------ app/services/gpx/track_importer.rb | 19 ++++++++------- app/services/immich/import_geodata.rb | 2 +- app/services/imports/watcher.rb | 2 +- app/services/photoprism/import_geodata.rb | 2 +- app/uploaders/import_uploader.rb | 5 ---- config/shrine.rb | 13 ---------- ...te_active_storage_tables.active_storage.rb | 24 +++++++++++-------- .../process_job_spec.rb} | 4 ++-- spec/models/user_spec.rb | 2 +- spec/requests/imports_spec.rb | 4 ++-- spec/services/gpx/track_importer_spec.rb | 8 +++++-- spec/services/immich/import_geodata_spec.rb | 8 +++---- spec/services/imports/watcher_spec.rb | 6 ++--- .../photoprism/import_geodata_spec.rb | 8 +++---- 20 files changed, 74 insertions(+), 88 deletions(-) create mode 100644 app/jobs/import/process_job.rb delete mode 100644 app/jobs/import_job.rb delete mode 100644 app/uploaders/import_uploader.rb delete mode 100644 config/shrine.rb rename spec/jobs/{import_job_spec.rb => import/process_job_spec.rb} (89%) diff --git a/Gemfile b/Gemfile index 4ed5dad3..564515c4 100644 --- a/Gemfile +++ b/Gemfile @@ -27,7 +27,6 @@ gem 'rgeo' gem 'rgeo-activerecord' gem 'rswag-api' gem 'rswag-ui' -gem 'shrine', '~> 3.6' gem 'sidekiq' gem 'sidekiq-cron' gem 'sidekiq-limit_fetch' diff --git a/app/controllers/imports_controller.rb b/app/controllers/imports_controller.rb index a6359e67..3b363e6b 100644 --- a/app/controllers/imports_controller.rb +++ b/app/controllers/imports_controller.rb @@ -29,20 +29,12 @@ class ImportsController < ApplicationController source: params[:import][:source] ) - file = File.read(file) + import.file.attach(file) - raw_data = - case params[:import][:source] - when 'gpx' then Hash.from_xml(file) - when 'owntracks' then OwnTracks::RecParser.new(file).call - else JSON.parse(file) - end - - import.update(raw_data:) import.id end - import_ids.each { ImportJob.perform_later(current_user.id, _1) } + import_ids.each { Import::ProcessJob.perform_later(_1) } redirect_to imports_url, notice: "#{files.size} files are queued to be imported in background", status: :see_other rescue StandardError => e diff --git a/app/jobs/import/process_job.rb b/app/jobs/import/process_job.rb new file mode 100644 index 00000000..4529d5f7 --- /dev/null +++ b/app/jobs/import/process_job.rb @@ -0,0 +1,11 @@ +# frozen_string_literal: true + +class Import::ProcessJob < ApplicationJob + queue_as :imports + + def perform(import_id) + import = Import.find(import_id) + + import.process! + end +end diff --git a/app/jobs/import_job.rb b/app/jobs/import_job.rb deleted file mode 100644 index a07cfa46..00000000 --- a/app/jobs/import_job.rb +++ /dev/null @@ -1,12 +0,0 @@ -# frozen_string_literal: true - -class ImportJob < ApplicationJob - queue_as :imports - - def perform(user_id, import_id) - user = User.find(user_id) - import = user.imports.find(import_id) - - import.process! - end -end diff --git a/app/models/import.rb b/app/models/import.rb index 045e8b5f..89a2387f 100644 --- a/app/models/import.rb +++ b/app/models/import.rb @@ -6,7 +6,9 @@ class Import < ApplicationRecord delegate :count, to: :points, prefix: true - include ImportUploader::Attachment(:raw) + has_one_attached :file + + after_commit -> { Import::ProcessJob.perform_later(id) }, on: :create enum :source, { google_semantic_history: 0, owntracks: 1, google_records: 2, diff --git a/app/models/user.rb b/app/models/user.rb index ee4d84f8..3a1c6071 100644 --- a/app/models/user.rb +++ b/app/models/user.rb @@ -15,6 +15,8 @@ class User < ApplicationRecord has_many :places, through: :visits has_many :trips, dependent: :destroy + has_many_attached :import_files + after_create :create_api_key after_create :import_sample_points after_commit :activate, on: :create, if: -> { DawarichSettings.self_hosted? } @@ -123,16 +125,15 @@ class User < ApplicationRecord Rails.env.production? || (Rails.env.test? && ENV['IMPORT_SAMPLE_POINTS']) - raw_data = Hash.from_xml( - File.read(Rails.root.join('lib/assets/sample_points.gpx')) - ) - import = imports.create( name: 'DELETE_ME_this_is_a_demo_import_DELETE_ME', - source: 'gpx', - raw_data: + source: 'gpx' ) - ImportJob.perform_later(id, import.id) + import.file.attach( + Rack::Test::UploadedFile.new( + Rails.root.join('lib/assets/sample_points.gpx'), 'application/xml' + ) + ) end end diff --git a/app/services/gpx/track_importer.rb b/app/services/gpx/track_importer.rb index 62f327cc..ee2c7338 100644 --- a/app/services/gpx/track_importer.rb +++ b/app/services/gpx/track_importer.rb @@ -3,22 +3,25 @@ class Gpx::TrackImporter include Imports::Broadcaster - attr_reader :import, :json, :user_id + attr_reader :import, :user_id def initialize(import, user_id) @import = import - @json = import.raw_data @user_id = user_id end def call - tracks = json['gpx']['trk'] - tracks_arr = tracks.is_a?(Array) ? tracks : [tracks] + import.file.open do |file| + json = Hash.from_xml(file) - points = tracks_arr.map { parse_track(_1) }.flatten.compact - points_data = points.map.with_index(1) { |point, index| prepare_point(point, index) }.compact + tracks = json['gpx']['trk'] + tracks_arr = tracks.is_a?(Array) ? tracks : [tracks] - bulk_insert_points(points_data) + points = tracks_arr.map { parse_track(_1) }.flatten.compact + points_data = points.map { prepare_point(_1) }.compact + + bulk_insert_points(points_data) + end end private @@ -32,7 +35,7 @@ class Gpx::TrackImporter segments_array.compact.map { |segment| segment['trkpt'] } end - def prepare_point(point, index) + def prepare_point(point) return if point['lat'].blank? || point['lon'].blank? || point['time'].blank? { diff --git a/app/services/immich/import_geodata.rb b/app/services/immich/import_geodata.rb index 469761d6..63a9232b 100644 --- a/app/services/immich/import_geodata.rb +++ b/app/services/immich/import_geodata.rb @@ -23,7 +23,7 @@ class Immich::ImportGeodata import.raw_data = immich_data_json import.save! - ImportJob.perform_later(user.id, import.id) + Import::ProcessJob.perform_later(import.id) end private diff --git a/app/services/imports/watcher.rb b/app/services/imports/watcher.rb index de9ca262..f29cacb8 100644 --- a/app/services/imports/watcher.rb +++ b/app/services/imports/watcher.rb @@ -51,7 +51,7 @@ class Imports::Watcher import.save! - ImportJob.perform_later(user.id, import.id) + Import::ProcessJob.perform_later(import.id) end def find_or_initialize_import(user, file_name) diff --git a/app/services/photoprism/import_geodata.rb b/app/services/photoprism/import_geodata.rb index 182681e6..2ad5fa6e 100644 --- a/app/services/photoprism/import_geodata.rb +++ b/app/services/photoprism/import_geodata.rb @@ -24,7 +24,7 @@ class Photoprism::ImportGeodata return create_import_failed_notification(import.name) unless import.new_record? import.update!(raw_data: json_data) - ImportJob.perform_later(user.id, import.id) + Import::ProcessJob.perform_later(import.id) end def find_or_create_import(json_data) diff --git a/app/uploaders/import_uploader.rb b/app/uploaders/import_uploader.rb deleted file mode 100644 index b4513d77..00000000 --- a/app/uploaders/import_uploader.rb +++ /dev/null @@ -1,5 +0,0 @@ -# frozen_string_literal: true - -class ImportUploader < Shrine - # plugins and uploading logic -end diff --git a/config/shrine.rb b/config/shrine.rb deleted file mode 100644 index 42fb0ef9..00000000 --- a/config/shrine.rb +++ /dev/null @@ -1,13 +0,0 @@ -# frozen_string_literal: true - -require 'shrine' -require 'shrine/storage/file_system' - -Shrine.storages = { - cache: Shrine::Storage::FileSystem.new('public', prefix: 'uploads/cache'), # temporary - store: Shrine::Storage::FileSystem.new('public', prefix: 'uploads') # permanent -} - -Shrine.plugin :activerecord # loads Active Record integration -Shrine.plugin :cached_attachment_data # enables retaining cached file across form redisplays -Shrine.plugin :restore_cached_data # extracts metadata for assigned cached files diff --git a/db/migrate/20240324161309_create_active_storage_tables.active_storage.rb b/db/migrate/20240324161309_create_active_storage_tables.active_storage.rb index e4706aa2..3b750c43 100644 --- a/db/migrate/20240324161309_create_active_storage_tables.active_storage.rb +++ b/db/migrate/20240324161309_create_active_storage_tables.active_storage.rb @@ -1,3 +1,5 @@ +# frozen_string_literal: true + # This migration comes from active_storage (originally 20170806125915) class CreateActiveStorageTables < ActiveRecord::Migration[7.0] def change @@ -19,7 +21,7 @@ class CreateActiveStorageTables < ActiveRecord::Migration[7.0] t.datetime :created_at, null: false end - t.index [ :key ], unique: true + t.index [:key], unique: true end create_table :active_storage_attachments, id: primary_key_type do |t| @@ -33,7 +35,8 @@ class CreateActiveStorageTables < ActiveRecord::Migration[7.0] t.datetime :created_at, null: false end - t.index [ :record_type, :record_id, :name, :blob_id ], name: :index_active_storage_attachments_uniqueness, unique: true + t.index %i[record_type record_id name blob_id], name: :index_active_storage_attachments_uniqueness, +unique: true t.foreign_key :active_storage_blobs, column: :blob_id end @@ -41,17 +44,18 @@ class CreateActiveStorageTables < ActiveRecord::Migration[7.0] t.belongs_to :blob, null: false, index: false, type: foreign_key_type t.string :variation_digest, null: false - t.index [ :blob_id, :variation_digest ], name: :index_active_storage_variant_records_uniqueness, unique: true + t.index %i[blob_id variation_digest], name: :index_active_storage_variant_records_uniqueness, unique: true t.foreign_key :active_storage_blobs, column: :blob_id end end private - def primary_and_foreign_key_types - config = Rails.configuration.generators - setting = config.options[config.orm][:primary_key_type] - primary_key_type = setting || :primary_key - foreign_key_type = setting || :bigint - [primary_key_type, foreign_key_type] - end + + def primary_and_foreign_key_types + config = Rails.configuration.generators + setting = config.options[config.orm][:primary_key_type] + primary_key_type = setting || :primary_key + foreign_key_type = setting || :bigint + [primary_key_type, foreign_key_type] + end end diff --git a/spec/jobs/import_job_spec.rb b/spec/jobs/import/process_job_spec.rb similarity index 89% rename from spec/jobs/import_job_spec.rb rename to spec/jobs/import/process_job_spec.rb index b6655a0c..d805ac9f 100644 --- a/spec/jobs/import_job_spec.rb +++ b/spec/jobs/import/process_job_spec.rb @@ -2,9 +2,9 @@ require 'rails_helper' -RSpec.describe ImportJob, type: :job do +RSpec.describe Import::ProcessJob, type: :job do describe '#perform' do - subject(:perform) { described_class.new.perform(user.id, import.id) } + subject(:perform) { described_class.new.perform(import.id) } let(:user) { create(:user) } let!(:import) { create(:import, user:, name: 'owntracks_export.json') } diff --git a/spec/models/user_spec.rb b/spec/models/user_spec.rb index 6fecebfd..3e808a4d 100644 --- a/spec/models/user_spec.rb +++ b/spec/models/user_spec.rb @@ -72,7 +72,7 @@ RSpec.describe User, type: :model do expect(user.imports.first.name).to eq('DELETE_ME_this_is_a_demo_import_DELETE_ME') expect(user.imports.first.source).to eq('gpx') - expect(ImportJob).to have_been_enqueued.with(user.id, user.imports.first.id) + expect(Import::ProcessJob).to have_been_enqueued.with(user.imports.first.id) end end end diff --git a/spec/requests/imports_spec.rb b/spec/requests/imports_spec.rb index 57074735..d9703a63 100644 --- a/spec/requests/imports_spec.rb +++ b/spec/requests/imports_spec.rb @@ -46,7 +46,7 @@ RSpec.describe 'Imports', type: :request do it 'queues import job' do expect do post imports_path, params: { import: { source: 'owntracks', files: [file] } } - end.to have_enqueued_job(ImportJob).on_queue('imports').at_least(1).times + end.to have_enqueued_job(Import::ProcessJob).on_queue('imports').at_least(1).times end it 'creates a new import' do @@ -64,7 +64,7 @@ RSpec.describe 'Imports', type: :request do it 'queues import job' do expect do post imports_path, params: { import: { source: 'gpx', files: [file] } } - end.to have_enqueued_job(ImportJob).on_queue('imports').at_least(1).times + end.to have_enqueued_job(Import::ProcessJob).on_queue('imports').at_least(1).times end it 'creates a new import' do diff --git a/spec/services/gpx/track_importer_spec.rb b/spec/services/gpx/track_importer_spec.rb index bef491c9..5aeb7117 100644 --- a/spec/services/gpx/track_importer_spec.rb +++ b/spec/services/gpx/track_importer_spec.rb @@ -8,8 +8,12 @@ RSpec.describe Gpx::TrackImporter do let(:user) { create(:user) } let(:file_path) { Rails.root.join('spec/fixtures/files/gpx/gpx_track_single_segment.gpx') } - let(:raw_data) { Hash.from_xml(File.read(file_path)) } - let(:import) { create(:import, user:, name: 'gpx_track.gpx', raw_data:) } + let(:file) { Rack::Test::UploadedFile.new(file_path, 'application/xml') } + let(:import) { create(:import, user:, name: 'gpx_track.gpx', source: 'gpx') } + + before do + import.file.attach(file) + end context 'when file has a single segment' do it 'creates points' do diff --git a/spec/services/immich/import_geodata_spec.rb b/spec/services/immich/import_geodata_spec.rb index b5460526..03147d6b 100644 --- a/spec/services/immich/import_geodata_spec.rb +++ b/spec/services/immich/import_geodata_spec.rb @@ -88,8 +88,8 @@ RSpec.describe Immich::ImportGeodata do expect { service }.to change { Import.count }.by(1) end - it 'enqueues ImportJob' do - expect(ImportJob).to receive(:perform_later) + it 'enqueues Import::ProcessJob' do + expect(Import::ProcessJob).to receive(:perform_later) service end @@ -101,8 +101,8 @@ RSpec.describe Immich::ImportGeodata do expect { service }.not_to(change { Import.count }) end - it 'does not enqueue ImportJob' do - expect(ImportJob).to_not receive(:perform_later) + it 'does not enqueue Import::ProcessJob' do + expect(Import::ProcessJob).to_not receive(:perform_later) service end diff --git a/spec/services/imports/watcher_spec.rb b/spec/services/imports/watcher_spec.rb index c155d23c..ac3041c8 100644 --- a/spec/services/imports/watcher_spec.rb +++ b/spec/services/imports/watcher_spec.rb @@ -23,7 +23,7 @@ RSpec.describe Imports::Watcher do end it 'enqueues importing jobs for the user' do - expect { service }.to have_enqueued_job(ImportJob).exactly(6).times + expect { service }.to have_enqueued_job(Import::ProcessJob).exactly(6).times end context 'when the import already exists' do @@ -41,8 +41,8 @@ RSpec.describe Imports::Watcher do end context 'when user does not exist' do - it 'does not call ImportJob' do - expect(ImportJob).not_to receive(:perform_later) + it 'does not call Import::ProcessJob' do + expect(Import::ProcessJob).not_to receive(:perform_later) service end diff --git a/spec/services/photoprism/import_geodata_spec.rb b/spec/services/photoprism/import_geodata_spec.rb index 341348fc..c0c4190b 100644 --- a/spec/services/photoprism/import_geodata_spec.rb +++ b/spec/services/photoprism/import_geodata_spec.rb @@ -154,8 +154,8 @@ RSpec.describe Photoprism::ImportGeodata do expect { service }.to change { Import.count }.by(1) end - it 'enqueues ImportJob' do - expect(ImportJob).to receive(:perform_later) + it 'enqueues Import::ProcessJob' do + expect(Import::ProcessJob).to receive(:perform_later) service end @@ -167,8 +167,8 @@ RSpec.describe Photoprism::ImportGeodata do expect { service }.not_to(change { Import.count }) end - it 'does not enqueue ImportJob' do - expect(ImportJob).to_not receive(:perform_later) + it 'does not enqueue Import::ProcessJob' do + expect(Import::ProcessJob).to_not receive(:perform_later) service end From f3b98ac83dadce8c33078037e786f81675221b83 Mon Sep 17 00:00:00 2001 From: Eugene Burmakin Date: Sun, 23 Mar 2025 19:00:04 +0100 Subject: [PATCH 02/11] Use attached import file to import own tracks data --- app/services/gpx/track_importer.rb | 2 +- app/services/own_tracks/importer.rb | 25 +++++++++++++---------- app/services/own_tracks/rec_parser.rb | 7 ++----- spec/factories/imports.rb | 3 +-- spec/services/own_tracks/importer_spec.rb | 8 +++++++- 5 files changed, 25 insertions(+), 20 deletions(-) diff --git a/app/services/gpx/track_importer.rb b/app/services/gpx/track_importer.rb index ee2c7338..9abd1a56 100644 --- a/app/services/gpx/track_importer.rb +++ b/app/services/gpx/track_importer.rb @@ -11,7 +11,7 @@ class Gpx::TrackImporter end def call - import.file.open do |file| + import.file.download do |file| json = Hash.from_xml(file) tracks = json['gpx']['trk'] diff --git a/app/services/own_tracks/importer.rb b/app/services/own_tracks/importer.rb index 20dbc706..75cd88ab 100644 --- a/app/services/own_tracks/importer.rb +++ b/app/services/own_tracks/importer.rb @@ -3,25 +3,28 @@ class OwnTracks::Importer include Imports::Broadcaster - attr_reader :import, :data, :user_id + attr_reader :import, :user_id def initialize(import, user_id) @import = import - @data = import.raw_data @user_id = user_id end def call - points_data = data.map.with_index(1) do |point, index| - OwnTracks::Params.new(point).call.merge( - import_id: import.id, - user_id: user_id, - created_at: Time.current, - updated_at: Time.current - ) - end + import.file.download do |file| + parsed_data = OwnTracks::RecParser.new(file).call - bulk_insert_points(points_data) + points_data = parsed_data.map do |point| + OwnTracks::Params.new(point).call.merge( + import_id: import.id, + user_id: user_id, + created_at: Time.current, + updated_at: Time.current + ) + end + + bulk_insert_points(points_data) + end end private diff --git a/app/services/own_tracks/rec_parser.rb b/app/services/own_tracks/rec_parser.rb index 7e502263..7e3550af 100644 --- a/app/services/own_tracks/rec_parser.rb +++ b/app/services/own_tracks/rec_parser.rb @@ -10,11 +10,8 @@ class OwnTracks::RecParser def call file.split("\n").map do |line| parts = line.split("\t") - if parts.size > 2 && parts[1].strip == '*' - JSON.parse(parts[2]) - else - nil - end + + Oj.load(parts[2]) if parts.size > 2 && parts[1].strip == '*' end.compact end end diff --git a/spec/factories/imports.rb b/spec/factories/imports.rb index 05be1e9f..07dec894 100644 --- a/spec/factories/imports.rb +++ b/spec/factories/imports.rb @@ -3,8 +3,7 @@ FactoryBot.define do factory :import do user - name { 'MARCH_2024.json' } + name { 'owntracks_export.json' } source { Import.sources[:owntracks] } - raw_data { OwnTracks::RecParser.new(File.read('spec/fixtures/files/owntracks/2024-03.rec')).call } end end diff --git a/spec/services/own_tracks/importer_spec.rb b/spec/services/own_tracks/importer_spec.rb index 2e3e16e9..0800d0b8 100644 --- a/spec/services/own_tracks/importer_spec.rb +++ b/spec/services/own_tracks/importer_spec.rb @@ -7,7 +7,13 @@ RSpec.describe OwnTracks::Importer do subject(:parser) { described_class.new(import, user.id).call } let(:user) { create(:user) } - let(:import) { create(:import, user:, name: 'owntracks_export.json') } + let(:import) { create(:import, user:, name: '2024-03.rec') } + let(:file_path) { Rails.root.join('spec/fixtures/files/owntracks/2024-03.rec') } + let(:file) { Rack::Test::UploadedFile.new(file_path, 'text/plain') } + + before do + import.file.attach(io: File.open(file_path), filename: '2024-03.rec', content_type: 'text/plain') + end context 'when file exists' do it 'creates points' do From 5758f9a923620758f10724e2184ea2fc7db6cf7d Mon Sep 17 00:00:00 2001 From: Eugene Burmakin Date: Sun, 23 Mar 2025 19:13:59 +0100 Subject: [PATCH 03/11] Use attached file to import geojson and phone takeout --- app/services/geojson/import_parser.rb | 28 ++++++++----------- .../google_maps/phone_takeout_parser.rb | 14 ++++++---- app/services/photos/import_parser.rb | 10 +------ spec/services/geojson/import_parser_spec.rb | 8 ++++-- .../google_maps/phone_takeout_parser_spec.rb | 12 +++++--- 5 files changed, 34 insertions(+), 38 deletions(-) diff --git a/app/services/geojson/import_parser.rb b/app/services/geojson/import_parser.rb index 13b8651c..95edaf10 100644 --- a/app/services/geojson/import_parser.rb +++ b/app/services/geojson/import_parser.rb @@ -2,34 +2,28 @@ class Geojson::ImportParser include Imports::Broadcaster + include PointValidation - attr_reader :import, :json, :user_id + attr_reader :import, :user_id def initialize(import, user_id) @import = import - @json = import.raw_data @user_id = user_id end def call - data = Geojson::Params.new(json).call + import.file.download do |file| + json = Oj.load(file) - data.each.with_index(1) do |point, index| - next if point_exists?(point, user_id) + data = Geojson::Params.new(json).call - Point.create!(point.merge(user_id:, import_id: import.id)) + data.each.with_index(1) do |point, index| + next if point_exists?(point, user_id) - broadcast_import_progress(import, index) + Point.create!(point.merge(user_id:, import_id: import.id)) + + broadcast_import_progress(import, index) + end end end - - private - - def point_exists?(params, user_id) - Point.exists?( - lonlat: params[:lonlat], - timestamp: params[:timestamp], - user_id: - ) - end end diff --git a/app/services/google_maps/phone_takeout_parser.rb b/app/services/google_maps/phone_takeout_parser.rb index a30b34d3..97d4626c 100644 --- a/app/services/google_maps/phone_takeout_parser.rb +++ b/app/services/google_maps/phone_takeout_parser.rb @@ -48,13 +48,15 @@ class GoogleMaps::PhoneTakeoutParser raw_signals = [] raw_array = [] - if import.raw_data.is_a?(Array) - raw_array = parse_raw_array(import.raw_data) - else - if import.raw_data['semanticSegments'] - semantic_segments = parse_semantic_segments(import.raw_data['semanticSegments']) + import.file.download do |file| + json = Oj.load(file) + + if json.is_a?(Array) + raw_array = parse_raw_array(json) + else + semantic_segments = parse_semantic_segments(json['semanticSegments']) if json['semanticSegments'] + raw_signals = parse_raw_signals(json['rawSignals']) if json['rawSignals'] end - raw_signals = parse_raw_signals(import.raw_data['rawSignals']) if import.raw_data['rawSignals'] end semantic_segments + raw_signals + raw_array diff --git a/app/services/photos/import_parser.rb b/app/services/photos/import_parser.rb index 610681fb..b91a9ca3 100644 --- a/app/services/photos/import_parser.rb +++ b/app/services/photos/import_parser.rb @@ -2,7 +2,7 @@ class Photos::ImportParser include Imports::Broadcaster - + include PointValidation attr_reader :import, :json, :user_id def initialize(import, user_id) @@ -29,12 +29,4 @@ class Photos::ImportParser broadcast_import_progress(import, index) end - - def point_exists?(point, timestamp) - Point.exists?( - lonlat: "POINT(#{point['longitude']} #{point['latitude']})", - timestamp:, - user_id: - ) - end end diff --git a/spec/services/geojson/import_parser_spec.rb b/spec/services/geojson/import_parser_spec.rb index f485b5da..ba5f76e9 100644 --- a/spec/services/geojson/import_parser_spec.rb +++ b/spec/services/geojson/import_parser_spec.rb @@ -12,8 +12,12 @@ RSpec.describe Geojson::ImportParser do context 'when file content is an object' do let(:file_path) { Rails.root.join('spec/fixtures/files/geojson/export.json') } - let(:raw_data) { JSON.parse(File.read(file_path)) } - let(:import) { create(:import, user:, name: 'geojson.json', raw_data:) } + let(:file) { Rack::Test::UploadedFile.new(file_path, 'application/json') } + let(:import) { create(:import, user:, name: 'geojson.json', file:) } + + before do + import.file.attach(io: File.open(file_path), filename: 'geojson.json', content_type: 'application/json') + end it 'creates new points' do expect { service }.to change { Point.count }.by(10) diff --git a/spec/services/google_maps/phone_takeout_parser_spec.rb b/spec/services/google_maps/phone_takeout_parser_spec.rb index 3050abb7..ac2db8b7 100644 --- a/spec/services/google_maps/phone_takeout_parser_spec.rb +++ b/spec/services/google_maps/phone_takeout_parser_spec.rb @@ -8,11 +8,15 @@ RSpec.describe GoogleMaps::PhoneTakeoutParser do let(:user) { create(:user) } + before do + import.file.attach(io: File.open(file_path), filename: 'phone_takeout.json', content_type: 'application/json') + end + context 'when file content is an object' do # This file contains 3 duplicates let(:file_path) { Rails.root.join('spec/fixtures/files/google/phone-takeout.json') } - let(:raw_data) { JSON.parse(File.read(file_path)) } - let(:import) { create(:import, user:, name: 'phone_takeout.json', raw_data:) } + let(:file) { Rack::Test::UploadedFile.new(file_path, 'application/json') } + let(:import) { create(:import, user:, name: 'phone_takeout.json', file:) } context 'when file exists' do it 'creates points' do @@ -24,8 +28,8 @@ RSpec.describe GoogleMaps::PhoneTakeoutParser do context 'when file content is an array' do # This file contains 4 duplicates let(:file_path) { Rails.root.join('spec/fixtures/files/google/location-history.json') } - let(:raw_data) { JSON.parse(File.read(file_path)) } - let(:import) { create(:import, user:, name: 'phone_takeout.json', raw_data:) } + let(:file) { Rack::Test::UploadedFile.new(file_path, 'application/json') } + let(:import) { create(:import, user:, name: 'phone_takeout.json', file:) } context 'when file exists' do it 'creates points' do From b66e18818bce6b70440e8f55b88558597c633892 Mon Sep 17 00:00:00 2001 From: Eugene Burmakin Date: Sun, 23 Mar 2025 21:00:31 +0100 Subject: [PATCH 04/11] Fix rest of the tests and re-enable visit suggesting --- app/controllers/imports_controller.rb | 10 ++--- app/services/immich/import_geodata.rb | 9 +++-- app/services/imports/create.rb | 4 +- app/services/imports/watcher.rb | 47 ++++++----------------- app/services/photoprism/import_geodata.rb | 9 ++++- spec/jobs/import/process_job_spec.rb | 7 +++- spec/services/imports/create_spec.rb | 22 ++++++++++- 7 files changed, 58 insertions(+), 50 deletions(-) diff --git a/app/controllers/imports_controller.rb b/app/controllers/imports_controller.rb index 3b363e6b..24a90e51 100644 --- a/app/controllers/imports_controller.rb +++ b/app/controllers/imports_controller.rb @@ -23,19 +23,17 @@ class ImportsController < ApplicationController def create files = import_params[:files].reject(&:blank?) - import_ids = files.map do |file| - import = current_user.imports.create( + files.each do |file| + import = current_user.imports.build( name: file.original_filename, source: params[:import][:source] ) - import.file.attach(file) + import.file.attach(io: file, filename: file.original_filename, content_type: file.content_type) - import.id + import.save! end - import_ids.each { Import::ProcessJob.perform_later(_1) } - redirect_to imports_url, notice: "#{files.size} files are queued to be imported in background", status: :see_other rescue StandardError => e Import.where(user: current_user, name: files.map(&:original_filename)).destroy_all diff --git a/app/services/immich/import_geodata.rb b/app/services/immich/import_geodata.rb index 63a9232b..770a8087 100644 --- a/app/services/immich/import_geodata.rb +++ b/app/services/immich/import_geodata.rb @@ -20,10 +20,13 @@ class Immich::ImportGeodata create_import_failed_notification(import.name) and return unless import.new_record? - import.raw_data = immich_data_json - import.save! + import.file.attach( + io: StringIO.new(immich_data_json.to_json), + filename: file_name, + content_type: 'application/json' + ) - Import::ProcessJob.perform_later(import.id) + import.save! end private diff --git a/app/services/imports/create.rb b/app/services/imports/create.rb index e34661b1..39b28eb7 100644 --- a/app/services/imports/create.rb +++ b/app/services/imports/create.rb @@ -14,7 +14,7 @@ class Imports::Create create_import_finished_notification(import, user) schedule_stats_creating(user.id) - # schedule_visit_suggesting(user.id, import) # Disabled until places & visits are reworked + schedule_visit_suggesting(user.id, import) rescue StandardError => e create_import_failed_notification(import, user, e) end @@ -44,7 +44,7 @@ class Imports::Create start_at = Time.zone.at(points.first.timestamp) end_at = Time.zone.at(points.last.timestamp) - VisitSuggestingJob.perform_later(user_ids: [user_id], start_at:, end_at:) + VisitSuggestingJob.perform_later(user_id:, start_at:, end_at:) end def create_import_finished_notification(import, user) diff --git a/app/services/imports/watcher.rb b/app/services/imports/watcher.rb index f29cacb8..79e0a59c 100644 --- a/app/services/imports/watcher.rb +++ b/app/services/imports/watcher.rb @@ -16,7 +16,7 @@ class Imports::Watcher file_names = file_names(user_directory_path) file_names.each do |file_name| - process_file(user, user_directory_path, file_name) + create_import(user, user_directory_path, file_name) end end end @@ -26,49 +26,29 @@ class Imports::Watcher def user_directories Dir.entries(WATCHED_DIR_PATH).select do |entry| path = File.join(WATCHED_DIR_PATH, entry) + File.directory?(path) && !['.', '..'].include?(entry) end end - def find_user(file_name) - email = file_name.split('_').first - - User.find_by(email:) - end - def file_names(directory_path) Dir.entries(directory_path).select { |file| SUPPORTED_FORMATS.include?(File.extname(file)) } end - def process_file(user, directory_path, file_name) + def create_import(user, directory_path, file_name) file_path = File.join(directory_path, file_name) import = Import.find_or_initialize_by(user:, name: file_name) return if import.persisted? import.source = source(file_name) - import.raw_data = raw_data(file_path, import.source) + import.file.attach( + io: File.open(file_path), + filename: file_name, + content_type: mime_type(import.source) + ) import.save! - - Import::ProcessJob.perform_later(import.id) - end - - def find_or_initialize_import(user, file_name) - import_name = file_name.split('_')[1..].join('_') - - Import.find_or_initialize_by(user:, name: import_name) - end - - def set_import_attributes(import, file_path, file_name) - source = source(file_name) - - import.source = source - import.raw_data = raw_data(file_path, source) - - import.save! - - import.id end def source(file_name) @@ -89,16 +69,13 @@ class Imports::Watcher end end - def raw_data(file_path, source) - file = File.read(file_path) - + def mime_type(source) case source.to_sym - when :gpx - Hash.from_xml(file) + when :gpx then 'application/xml' when :json, :geojson, :google_phone_takeout, :google_records, :google_semantic_history - JSON.parse(file) + 'application/json' when :owntracks - OwnTracks::RecParser.new(file).call + 'application/octet-stream' else raise UnsupportedSourceError, "Unsupported source: #{source}" end diff --git a/app/services/photoprism/import_geodata.rb b/app/services/photoprism/import_geodata.rb index 2ad5fa6e..2d0e7a68 100644 --- a/app/services/photoprism/import_geodata.rb +++ b/app/services/photoprism/import_geodata.rb @@ -23,8 +23,13 @@ class Photoprism::ImportGeodata import = find_or_create_import(json_data) return create_import_failed_notification(import.name) unless import.new_record? - import.update!(raw_data: json_data) - Import::ProcessJob.perform_later(import.id) + import.file.attach( + io: StringIO.new(json_data.to_json), + filename: file_name(json_data), + content_type: 'application/json' + ) + + import.save! end def find_or_create_import(json_data) diff --git a/spec/jobs/import/process_job_spec.rb b/spec/jobs/import/process_job_spec.rb index d805ac9f..bd102947 100644 --- a/spec/jobs/import/process_job_spec.rb +++ b/spec/jobs/import/process_job_spec.rb @@ -7,7 +7,12 @@ RSpec.describe Import::ProcessJob, type: :job do subject(:perform) { described_class.new.perform(import.id) } let(:user) { create(:user) } - let!(:import) { create(:import, user:, name: 'owntracks_export.json') } + let!(:import) { create(:import, user:, name: '2024-03.rec') } + let(:file_path) { Rails.root.join('spec/fixtures/files/owntracks/2024-03.rec') } + + before do + import.file.attach(io: File.open(file_path), filename: '2024-03.rec', content_type: 'application/octet-stream') + end it 'creates points' do expect { perform }.to change { Point.count }.by(9) diff --git a/spec/services/imports/create_spec.rb b/spec/services/imports/create_spec.rb index 961d014c..02da61b9 100644 --- a/spec/services/imports/create_spec.rb +++ b/spec/services/imports/create_spec.rb @@ -9,6 +9,13 @@ RSpec.describe Imports::Create do describe '#call' do context 'when source is google_semantic_history' do let(:import) { create(:import, source: 'google_semantic_history') } + let(:file_path) { Rails.root.join('spec/fixtures/files/google/semantic_history.json') } + let(:file) { Rack::Test::UploadedFile.new(file_path, 'application/json') } + + before do + import.file.attach(io: File.open(file_path), filename: 'semantic_history.json', + content_type: 'application/json') + end it 'calls the GoogleMaps::SemanticHistoryParser' do expect(GoogleMaps::SemanticHistoryParser).to \ @@ -29,6 +36,12 @@ RSpec.describe Imports::Create do context 'when source is owntracks' do let(:import) { create(:import, source: 'owntracks') } + let(:file_path) { Rails.root.join('spec/fixtures/files/owntracks/2024-03.rec') } + let(:file) { Rack::Test::UploadedFile.new(file_path, 'application/octet-stream') } + + before do + import.file.attach(io: File.open(file_path), filename: '2024-03.rec', content_type: 'application/octet-stream') + end it 'calls the OwnTracks::Importer' do expect(OwnTracks::Importer).to \ @@ -50,7 +63,7 @@ RSpec.describe Imports::Create do end end - xit 'schedules visit suggesting' do + it 'schedules visit suggesting' do Sidekiq::Testing.inline! do expect { service.call }.to have_enqueued_job(VisitSuggestingJob) end @@ -72,6 +85,13 @@ RSpec.describe Imports::Create do context 'when source is gpx' do let(:import) { create(:import, source: 'gpx') } + let(:file_path) { Rails.root.join('spec/fixtures/files/gpx/gpx_track_single_segment.gpx') } + let(:file) { Rack::Test::UploadedFile.new(file_path, 'application/octet-stream') } + + before do + import.file.attach(io: File.open(file_path), filename: 'gpx_track_single_segment.gpx', + content_type: 'application/octet-stream') + end it 'calls the Gpx::TrackImporter' do expect(Gpx::TrackImporter).to \ From 477ef709c329b384c11145c28f906ff6be2fdb58 Mon Sep 17 00:00:00 2001 From: Eugene Burmakin Date: Sun, 23 Mar 2025 21:06:23 +0100 Subject: [PATCH 05/11] Update changelog --- CHANGELOG.md | 12 ++++++++++++ app/models/user.rb | 2 -- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e90314c1..35d088d0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,18 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/) and this project adheres to [Semantic Versioning](http://semver.org/). +# Unreleased + +## TODO: + +- Migrate existing imports from `raw_data` to the new file storage. +- Delete import files when import is deleted. +- Stream import files for parsing instead of downloading them. + +## Changed + +- Import files are now being attached to the import record instead of being stored in the `raw_data` database column. + # 0.25.3 - 2025-03-22 ## Fixed diff --git a/app/models/user.rb b/app/models/user.rb index 3a1c6071..44225162 100644 --- a/app/models/user.rb +++ b/app/models/user.rb @@ -15,8 +15,6 @@ class User < ApplicationRecord has_many :places, through: :visits has_many :trips, dependent: :destroy - has_many_attached :import_files - after_create :create_api_key after_create :import_sample_points after_commit :activate, on: :create, if: -> { DawarichSettings.self_hosted? } From 1e54d87d53166e07e2803525e1dcf3518cd29dcc Mon Sep 17 00:00:00 2001 From: Eugene Burmakin Date: Sun, 23 Mar 2025 22:00:41 +0100 Subject: [PATCH 06/11] Implement support for storing import files in S3. --- CHANGELOG.md | 2 +- Gemfile | 4 +++ Gemfile.lock | 27 ++++++++++++++----- app/models/import.rb | 1 + .../google_maps/semantic_history_parser.rb | 10 ++++--- config/environments/development.rb | 2 ++ config/initializers/aws.rb | 11 ++++++++ config/storage.yml | 13 +++++---- 8 files changed, 52 insertions(+), 18 deletions(-) create mode 100644 config/initializers/aws.rb diff --git a/CHANGELOG.md b/CHANGELOG.md index 35d088d0..68c1849a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,8 +9,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/). ## TODO: - Migrate existing imports from `raw_data` to the new file storage. -- Delete import files when import is deleted. - Stream import files for parsing instead of downloading them. +- Add randomized name to the import files before attaching them to the import record. ## Changed diff --git a/Gemfile b/Gemfile index 564515c4..ed3e9e9d 100644 --- a/Gemfile +++ b/Gemfile @@ -5,6 +5,10 @@ git_source(:github) { |repo| "https://github.com/#{repo}.git" } ruby File.read('.ruby-version').strip +# https://meta.discourse.org/t/cant-rebuild-due-to-aws-sdk-gem-bump-and-new-aws-data-integrity-protections/354217/40 +gem 'aws-sdk-s3', '~> 1.177.0', require: false +gem 'aws-sdk-core', '~> 3.215.1', require: false +gem 'aws-sdk-kms', '~> 1.96.0', require: false gem 'bootsnap', require: false gem 'chartkick' gem 'data_migrate' diff --git a/Gemfile.lock b/Gemfile.lock index adff6149..35bfd90a 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -79,6 +79,22 @@ GEM public_suffix (>= 2.0.2, < 7.0) ast (2.4.2) attr_extras (7.1.0) + aws-eventstream (1.3.2) + aws-partitions (1.1072.0) + aws-sdk-core (3.215.1) + aws-eventstream (~> 1, >= 1.3.0) + aws-partitions (~> 1, >= 1.992.0) + aws-sigv4 (~> 1.9) + jmespath (~> 1, >= 1.6.1) + aws-sdk-kms (1.96.0) + aws-sdk-core (~> 3, >= 3.210.0) + aws-sigv4 (~> 1.5) + aws-sdk-s3 (1.177.0) + aws-sdk-core (~> 3, >= 3.210.0) + aws-sdk-kms (~> 1) + aws-sigv4 (~> 1.5) + aws-sigv4 (1.11.0) + aws-eventstream (~> 1, >= 1.0.2) base64 (0.2.0) bcrypt (3.1.20) benchmark (0.4.0) @@ -91,7 +107,6 @@ GEM coderay (1.1.3) concurrent-ruby (1.3.5) connection_pool (2.5.0) - content_disposition (1.0.0) crack (1.0.0) bigdecimal rexml @@ -121,8 +136,6 @@ GEM dotenv-rails (3.1.7) dotenv (= 3.1.7) railties (>= 6.1) - down (5.4.2) - addressable (~> 2.8) drb (2.2.1) erubi (1.13.1) et-orbi (1.2.11) @@ -164,6 +177,7 @@ GEM pp (>= 0.6.0) rdoc (>= 4.0.0) reline (>= 0.4.2) + jmespath (1.6.2) json (2.10.1) json-schema (5.0.1) addressable (~> 2.8) @@ -371,9 +385,6 @@ GEM securerandom (0.4.1) shoulda-matchers (6.4.0) activesupport (>= 5.2.0) - shrine (3.6.0) - content_disposition (~> 1.0) - down (~> 5.1) sidekiq (7.3.9) base64 connection_pool (>= 2.3.0) @@ -453,6 +464,9 @@ PLATFORMS DEPENDENCIES activerecord-postgis-adapter + aws-sdk-core (~> 3.215.1) + aws-sdk-kms (~> 1.96.0) + aws-sdk-s3 (~> 1.177.0) bootsnap chartkick data_migrate @@ -488,7 +502,6 @@ DEPENDENCIES rswag-ui rubocop-rails shoulda-matchers - shrine (~> 3.6) sidekiq sidekiq-cron sidekiq-limit_fetch diff --git a/app/models/import.rb b/app/models/import.rb index 89a2387f..1e490e56 100644 --- a/app/models/import.rb +++ b/app/models/import.rb @@ -9,6 +9,7 @@ class Import < ApplicationRecord has_one_attached :file after_commit -> { Import::ProcessJob.perform_later(id) }, on: :create + after_commit -> { file.purge }, on: :destroy enum :source, { google_semantic_history: 0, owntracks: 1, google_records: 2, diff --git a/app/services/google_maps/semantic_history_parser.rb b/app/services/google_maps/semantic_history_parser.rb index 77984c09..e4c2f046 100644 --- a/app/services/google_maps/semantic_history_parser.rb +++ b/app/services/google_maps/semantic_history_parser.rb @@ -63,9 +63,13 @@ class GoogleMaps::SemanticHistoryParser end def parse_json - import.raw_data['timelineObjects'].flat_map do |timeline_object| - parse_timeline_object(timeline_object) - end.compact + import.file.download do |f| + json = Oj.load(f) + + json['timelineObjects'].flat_map do |timeline_object| + parse_timeline_object(timeline_object) + end.compact + end end def parse_timeline_object(timeline_object) diff --git a/config/environments/development.rb b/config/environments/development.rb index 29b9a038..3edfc64e 100644 --- a/config/environments/development.rb +++ b/config/environments/development.rb @@ -98,4 +98,6 @@ Rails.application.configure do config.logger = Logger.new($stdout) config.lograge.enabled = true config.lograge.formatter = Lograge::Formatters::Json.new + + config.active_storage.service = ENV['SELF_HOSTED'] == 'true' ? :local : :s3 end diff --git a/config/initializers/aws.rb b/config/initializers/aws.rb new file mode 100644 index 00000000..8be964d9 --- /dev/null +++ b/config/initializers/aws.rb @@ -0,0 +1,11 @@ +# frozen_string_literal: true + +require 'aws-sdk-core' + +Aws.config.update( + { + region: ENV['AWS_REGION'], + endpoint: ENV['AWS_ENDPOINT'], + credentials: Aws::Credentials.new(ENV['AWS_ACCESS_KEY_ID'], ENV['AWS_SECRET_ACCESS_KEY']) + } +) diff --git a/config/storage.yml b/config/storage.yml index 4942ab66..02112314 100644 --- a/config/storage.yml +++ b/config/storage.yml @@ -6,13 +6,12 @@ local: service: Disk root: <%= Rails.root.join("storage") %> -# Use bin/rails credentials:edit to set the AWS secrets (as aws:access_key_id|secret_access_key) -# amazon: -# service: S3 -# access_key_id: <%= Rails.application.credentials.dig(:aws, :access_key_id) %> -# secret_access_key: <%= Rails.application.credentials.dig(:aws, :secret_access_key) %> -# region: us-east-1 -# bucket: your_own_bucket-<%= Rails.env %> +s3: + service: S3 + access_key_id: <%= ENV.fetch("AWS_ACCESS_KEY_ID") %> + secret_access_key: <%= ENV.fetch("AWS_SECRET_ACCESS_KEY") %> + region: <%= ENV.fetch("AWS_REGION") %> + bucket: <%= ENV.fetch("AWS_BUCKET") %> # Remember not to checkin your GCS keyfile to a repository # google: From 26c7a4cca3d65011ac149c2cebf060b5e5a0dc89 Mon Sep 17 00:00:00 2001 From: Eugene Burmakin Date: Sun, 23 Mar 2025 22:57:30 +0100 Subject: [PATCH 07/11] Update changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 68c1849a..7879bd3e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,10 +11,12 @@ and this project adheres to [Semantic Versioning](http://semver.org/). - Migrate existing imports from `raw_data` to the new file storage. - Stream import files for parsing instead of downloading them. - Add randomized name to the import files before attaching them to the import record. +- Export files should also be stored in the S3-compatible storage and be available for download. ## Changed - Import files are now being attached to the import record instead of being stored in the `raw_data` database column. +- Import files can now be stored in S3-compatible storage. # 0.25.3 - 2025-03-22 From fc8d0d8ddc10e1791c1c1489286ae85cad7497af Mon Sep 17 00:00:00 2001 From: Eugene Burmakin Date: Mon, 24 Mar 2025 20:46:16 +0100 Subject: [PATCH 08/11] Fix failing tests and move testing imports to files --- CHANGELOG.md | 4 +- app/controllers/exports_controller.rb | 18 +- app/jobs/export_job.rb | 4 +- app/models/export.rb | 12 +- app/services/exports/create.rb | 42 ++--- .../google_maps/semantic_history_parser.rb | 12 +- app/views/exports/index.html.erb | 6 +- ...5_add_format_start_at_end_at_to_exports.rb | 9 + db/schema.rb | 5 +- spec/factories/exports.rb | 4 +- ...th_activitySegment_with_startLocation.json | 10 ++ ...egment_with_startLocation_timestampMs.json | 10 ++ ...tion_timestamp_in_milliseconds_format.json | 10 ++ ...tLocation_timestamp_in_seconds_format.json | 10 ++ ...with_startLocation_with_iso_timestamp.json | 10 ++ ...activitySegment_without_startLocation.json | 14 ++ ...ut_startLocation_without_waypointPath.json | 9 + ...eVisit_with_location_with_coordinates.json | 10 ++ ...n_with_coordinates_with_iso_timestamp.json | 10 ++ ...ordinates_with_milliseconds_timestamp.json | 10 ++ ...th_coordinates_with_seconds_timestamp.json | 10 ++ ...ion_with_coordinates_with_timestampMs.json | 10 ++ ...sit_without_location_with_coordinates.json | 10 ++ ...rdinates_with_otherCandidateLocations.json | 10 ++ spec/jobs/export_job_spec.rb | 4 +- spec/models/export_spec.rb | 1 + spec/requests/exports_spec.rb | 20 +-- spec/services/exports/create_spec.rb | 14 +- .../semantic_history_parser_spec.rb | 156 ++++-------------- 29 files changed, 260 insertions(+), 194 deletions(-) create mode 100644 db/migrate/20250324180755_add_format_start_at_end_at_to_exports.rb create mode 100644 spec/fixtures/files/google/location-history/with_activitySegment_with_startLocation.json create mode 100644 spec/fixtures/files/google/location-history/with_activitySegment_with_startLocation_timestampMs.json create mode 100644 spec/fixtures/files/google/location-history/with_activitySegment_with_startLocation_timestamp_in_milliseconds_format.json create mode 100644 spec/fixtures/files/google/location-history/with_activitySegment_with_startLocation_timestamp_in_seconds_format.json create mode 100644 spec/fixtures/files/google/location-history/with_activitySegment_with_startLocation_with_iso_timestamp.json create mode 100644 spec/fixtures/files/google/location-history/with_activitySegment_without_startLocation.json create mode 100644 spec/fixtures/files/google/location-history/with_activitySegment_without_startLocation_without_waypointPath.json create mode 100644 spec/fixtures/files/google/location-history/with_placeVisit_with_location_with_coordinates.json create mode 100644 spec/fixtures/files/google/location-history/with_placeVisit_with_location_with_coordinates_with_iso_timestamp.json create mode 100644 spec/fixtures/files/google/location-history/with_placeVisit_with_location_with_coordinates_with_milliseconds_timestamp.json create mode 100644 spec/fixtures/files/google/location-history/with_placeVisit_with_location_with_coordinates_with_seconds_timestamp.json create mode 100644 spec/fixtures/files/google/location-history/with_placeVisit_with_location_with_coordinates_with_timestampMs.json create mode 100644 spec/fixtures/files/google/location-history/with_placeVisit_without_location_with_coordinates.json create mode 100644 spec/fixtures/files/google/location-history/with_placeVisit_without_location_with_coordinates_with_otherCandidateLocations.json diff --git a/CHANGELOG.md b/CHANGELOG.md index 7879bd3e..1fdabb84 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,13 +10,13 @@ and this project adheres to [Semantic Versioning](http://semver.org/). - Migrate existing imports from `raw_data` to the new file storage. - Stream import files for parsing instead of downloading them. -- Add randomized name to the import files before attaching them to the import record. -- Export files should also be stored in the S3-compatible storage and be available for download. ## Changed - Import files are now being attached to the import record instead of being stored in the `raw_data` database column. - Import files can now be stored in S3-compatible storage. +- Export files are now being attached to the export record instead of being stored in the file system. +- Export files can now be stored in S3-compatible storage. # 0.25.3 - 2025-03-22 diff --git a/app/controllers/exports_controller.rb b/app/controllers/exports_controller.rb index 34b239dc..e15ab66f 100644 --- a/app/controllers/exports_controller.rb +++ b/app/controllers/exports_controller.rb @@ -11,9 +11,13 @@ class ExportsController < ApplicationController def create export_name = "export_from_#{params[:start_at].to_date}_to_#{params[:end_at].to_date}.#{params[:file_format]}" - export = current_user.exports.create(name: export_name, status: :created) - - ExportJob.perform_later(export.id, params[:start_at], params[:end_at], file_format: params[:file_format]) + export = current_user.exports.create( + name: export_name, + status: :created, + format: params[:file_format], + start_at: params[:start_at], + end_at: params[:end_at] + ) redirect_to exports_url, notice: 'Export was successfully initiated. Please wait until it\'s finished.' rescue StandardError => e @@ -23,11 +27,7 @@ class ExportsController < ApplicationController end def destroy - ActiveRecord::Base.transaction do - @export.destroy - - File.delete(Rails.root.join('public', 'exports', @export.name)) - end + @export.destroy redirect_to exports_url, notice: 'Export was successfully destroyed.', status: :see_other end @@ -39,6 +39,6 @@ class ExportsController < ApplicationController end def export_params - params.require(:export).permit(:name, :url, :status) + params.require(:export).permit(:name, :url, :status, :format) end end diff --git a/app/jobs/export_job.rb b/app/jobs/export_job.rb index b8872c05..ea1ae819 100644 --- a/app/jobs/export_job.rb +++ b/app/jobs/export_job.rb @@ -3,9 +3,9 @@ class ExportJob < ApplicationJob queue_as :exports - def perform(export_id, start_at, end_at, file_format: :json) + def perform(export_id) export = Export.find(export_id) - Exports::Create.new(export:, start_at:, end_at:, file_format:).call + Exports::Create.new(export:).call end end diff --git a/app/models/export.rb b/app/models/export.rb index c9b4d071..8ba14bef 100644 --- a/app/models/export.rb +++ b/app/models/export.rb @@ -4,16 +4,16 @@ class Export < ApplicationRecord belongs_to :user enum :status, { created: 0, processing: 1, completed: 2, failed: 3 } + enum :format, { json: 0, gpx: 1 } validates :name, presence: true - before_destroy :delete_export_file + has_one_attached :file - private + after_commit -> { ExportJob.perform_later(id) }, on: :create + after_commit -> { file.purge }, on: :destroy - def delete_export_file - file_path = Rails.root.join('public', 'exports', "#{name}.json") - - File.delete(file_path) if File.exist?(file_path) + def process! + Exports::Create.new(export: self).call end end diff --git a/app/services/exports/create.rb b/app/services/exports/create.rb index 08181b4d..c6ebd82f 100644 --- a/app/services/exports/create.rb +++ b/app/services/exports/create.rb @@ -1,26 +1,28 @@ # frozen_string_literal: true class Exports::Create - def initialize(export:, start_at:, end_at:, file_format: :json) + def initialize(export:) @export = export @user = export.user - @start_at = start_at.to_datetime - @end_at = end_at.to_datetime - @file_format = file_format + @start_at = export.start_at + @end_at = export.end_at + @file_format = export.format end def call - export.update!(status: :processing) + ActiveRecord::Base.transaction do + export.update!(status: :processing) - points = time_framed_points + points = time_framed_points - data = points_data(points) + data = points_data(points) - create_export_file(data) + attach_export_file(data) - export.update!(status: :completed, url: "exports/#{export.name}") + export.update!(status: :completed) - create_export_finished_notification + create_export_finished_notification + end rescue StandardError => e create_failed_export_notification(e) @@ -72,18 +74,18 @@ class Exports::Create Points::GpxSerializer.new(points, export.name).call end - def create_export_file(data) - dir_path = Rails.root.join('public/exports') - - FileUtils.mkdir_p(dir_path) unless Dir.exist?(dir_path) - - file_path = dir_path.join(export.name) - - Rails.logger.info("Creating export file at: #{file_path}") - - File.open(file_path, 'w') { |file| file.write(data) } + def attach_export_file(data) + export.file.attach(io: StringIO.new(data.to_s), filename: export.name, content_type:) rescue StandardError => e Rails.logger.error("Failed to create export file: #{e.message}") raise end + + def content_type + case file_format.to_sym + when :json then 'application/json' + when :gpx then 'application/gpx+xml' + else raise ArgumentError, "Unsupported file format: #{file_format}" + end + end end diff --git a/app/services/google_maps/semantic_history_parser.rb b/app/services/google_maps/semantic_history_parser.rb index e4c2f046..dd5f23d0 100644 --- a/app/services/google_maps/semantic_history_parser.rb +++ b/app/services/google_maps/semantic_history_parser.rb @@ -13,9 +13,7 @@ class GoogleMaps::SemanticHistoryParser end def call - points_data = parse_json - - points_data.each_slice(BATCH_SIZE) do |batch| + parsed_json.each_slice(BATCH_SIZE) do |batch| @current_index += batch.size process_batch(batch) broadcast_import_progress(import, @current_index) @@ -62,14 +60,18 @@ class GoogleMaps::SemanticHistoryParser ) end - def parse_json + def parsed_json + data = nil + import.file.download do |f| json = Oj.load(f) - json['timelineObjects'].flat_map do |timeline_object| + data = json['timelineObjects'].flat_map do |timeline_object| parse_timeline_object(timeline_object) end.compact end + + data end def parse_timeline_object(timeline_object) diff --git a/app/views/exports/index.html.erb b/app/views/exports/index.html.erb index 33888a94..8f9fa865 100644 --- a/app/views/exports/index.html.erb +++ b/app/views/exports/index.html.erb @@ -41,7 +41,11 @@ <%= export.status %> <% if export.completed? %> - <%= link_to 'Download', export.url, class: "px-4 py-2 bg-blue-500 text-white rounded-md", download: export.name %> + <% if export.url.present? %> + <%= link_to 'Download', export.url, class: "px-4 py-2 bg-blue-500 text-white rounded-md", download: export.name %> + <% else %> + <%= link_to 'Download', export.file.url, class: "px-4 py-2 bg-blue-500 text-white rounded-md", download: export.name %> + <% end %> <% end %> <%= link_to 'Delete', export, data: { confirm: "Are you sure?", turbo_confirm: "Are you sure?", turbo_method: :delete }, method: :delete, class: "px-4 py-2 bg-red-500 text-white rounded-md" %> diff --git a/db/migrate/20250324180755_add_format_start_at_end_at_to_exports.rb b/db/migrate/20250324180755_add_format_start_at_end_at_to_exports.rb new file mode 100644 index 00000000..1f515199 --- /dev/null +++ b/db/migrate/20250324180755_add_format_start_at_end_at_to_exports.rb @@ -0,0 +1,9 @@ +# frozen_string_literal: true + +class AddFormatStartAtEndAtToExports < ActiveRecord::Migration[8.0] + def change + add_column :exports, :format, :integer, default: 0 + add_column :exports, :start_at, :datetime + add_column :exports, :end_at, :datetime + end +end diff --git a/db/schema.rb b/db/schema.rb index 562e417c..e0c2ca0e 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema[8.0].define(version: 2025_03_03_194043) do +ActiveRecord::Schema[8.0].define(version: 2025_03_24_180755) do # These are extensions that must be enabled in order to support this database enable_extension "pg_catalog.plpgsql" enable_extension "postgis" @@ -74,6 +74,9 @@ ActiveRecord::Schema[8.0].define(version: 2025_03_03_194043) do t.bigint "user_id", null: false t.datetime "created_at", null: false t.datetime "updated_at", null: false + t.integer "format", default: 0 + t.datetime "start_at" + t.datetime "end_at" t.index ["status"], name: "index_exports_on_status" t.index ["user_id"], name: "index_exports_on_user_id" end diff --git a/spec/factories/exports.rb b/spec/factories/exports.rb index f8c97938..1eae0b47 100644 --- a/spec/factories/exports.rb +++ b/spec/factories/exports.rb @@ -3,8 +3,8 @@ FactoryBot.define do factory :export do name { 'export' } - url { 'exports/export.json' } - status { 1 } + status { :created } + format { :json } user end end diff --git a/spec/fixtures/files/google/location-history/with_activitySegment_with_startLocation.json b/spec/fixtures/files/google/location-history/with_activitySegment_with_startLocation.json new file mode 100644 index 00000000..d32f79b5 --- /dev/null +++ b/spec/fixtures/files/google/location-history/with_activitySegment_with_startLocation.json @@ -0,0 +1,10 @@ +{ + "timelineObjects": [ + { + "activitySegment": { + "startLocation": { "latitudeE7": 123422222, "longitudeE7": 123422222 }, + "duration": { "startTimestamp": "2025-03-24 20:07:24 +0100" } + } + } + ] +} diff --git a/spec/fixtures/files/google/location-history/with_activitySegment_with_startLocation_timestampMs.json b/spec/fixtures/files/google/location-history/with_activitySegment_with_startLocation_timestampMs.json new file mode 100644 index 00000000..337817f7 --- /dev/null +++ b/spec/fixtures/files/google/location-history/with_activitySegment_with_startLocation_timestampMs.json @@ -0,0 +1,10 @@ +{ + "timelineObjects": [ + { + "activitySegment": { + "startLocation": { "latitudeE7": 123466666, "longitudeE7": 123466666 }, + "duration": { "startTimestampMs": "1742844302585" } + } + } + ] +} diff --git a/spec/fixtures/files/google/location-history/with_activitySegment_with_startLocation_timestamp_in_milliseconds_format.json b/spec/fixtures/files/google/location-history/with_activitySegment_with_startLocation_timestamp_in_milliseconds_format.json new file mode 100644 index 00000000..9aba1d8d --- /dev/null +++ b/spec/fixtures/files/google/location-history/with_activitySegment_with_startLocation_timestamp_in_milliseconds_format.json @@ -0,0 +1,10 @@ +{ + "timelineObjects": [ + { + "activitySegment": { + "startLocation": { "latitudeE7": 123455555, "longitudeE7": 123455555 }, + "duration": { "startTimestamp": "1742844232" } + } + } + ] +} diff --git a/spec/fixtures/files/google/location-history/with_activitySegment_with_startLocation_timestamp_in_seconds_format.json b/spec/fixtures/files/google/location-history/with_activitySegment_with_startLocation_timestamp_in_seconds_format.json new file mode 100644 index 00000000..592ae45c --- /dev/null +++ b/spec/fixtures/files/google/location-history/with_activitySegment_with_startLocation_timestamp_in_seconds_format.json @@ -0,0 +1,10 @@ +{ + "timelineObjects": [ + { + "activitySegment": { + "startLocation": { "latitudeE7": 123444444, "longitudeE7": 123444444 }, + "duration": { "startTimestamp": "1742844302585" } + } + } + ] +} diff --git a/spec/fixtures/files/google/location-history/with_activitySegment_with_startLocation_with_iso_timestamp.json b/spec/fixtures/files/google/location-history/with_activitySegment_with_startLocation_with_iso_timestamp.json new file mode 100644 index 00000000..7fdd712e --- /dev/null +++ b/spec/fixtures/files/google/location-history/with_activitySegment_with_startLocation_with_iso_timestamp.json @@ -0,0 +1,10 @@ +{ + "timelineObjects": [ + { + "activitySegment": { + "startLocation": { "latitudeE7": 123433333, "longitudeE7": 123433333 }, + "duration": { "startTimestamp": "2025-03-24T20:20:23+01:00" } + } + } + ] +} diff --git a/spec/fixtures/files/google/location-history/with_activitySegment_without_startLocation.json b/spec/fixtures/files/google/location-history/with_activitySegment_without_startLocation.json new file mode 100644 index 00000000..c5d6b3df --- /dev/null +++ b/spec/fixtures/files/google/location-history/with_activitySegment_without_startLocation.json @@ -0,0 +1,14 @@ +{ + "timelineObjects": [ + { + "activitySegment": { + "waypointPath": { + "waypoints": [ + { "latE7": 123411111, "lngE7": 123411111 } + ] + }, + "duration": { "startTimestamp": "2025-03-24 20:07:24 +0100" } + } + } + ] +} diff --git a/spec/fixtures/files/google/location-history/with_activitySegment_without_startLocation_without_waypointPath.json b/spec/fixtures/files/google/location-history/with_activitySegment_without_startLocation_without_waypointPath.json new file mode 100644 index 00000000..f8ae2840 --- /dev/null +++ b/spec/fixtures/files/google/location-history/with_activitySegment_without_startLocation_without_waypointPath.json @@ -0,0 +1,9 @@ +{ + "timelineObjects": [ + { + "activitySegment": { + "duration": { "startTimestamp": "2025-03-24 20:07:24 +0100" } + } + } + ] +} diff --git a/spec/fixtures/files/google/location-history/with_placeVisit_with_location_with_coordinates.json b/spec/fixtures/files/google/location-history/with_placeVisit_with_location_with_coordinates.json new file mode 100644 index 00000000..955d587b --- /dev/null +++ b/spec/fixtures/files/google/location-history/with_placeVisit_with_location_with_coordinates.json @@ -0,0 +1,10 @@ +{ + "timelineObjects": [ + { + "placeVisit": { + "location": { "latitudeE7": 123477777, "longitudeE7": 123477777 }, + "duration": { "startTimestamp": "1742844232" } + } + } + ] +} diff --git a/spec/fixtures/files/google/location-history/with_placeVisit_with_location_with_coordinates_with_iso_timestamp.json b/spec/fixtures/files/google/location-history/with_placeVisit_with_location_with_coordinates_with_iso_timestamp.json new file mode 100644 index 00000000..e0d6678b --- /dev/null +++ b/spec/fixtures/files/google/location-history/with_placeVisit_with_location_with_coordinates_with_iso_timestamp.json @@ -0,0 +1,10 @@ +{ + "timelineObjects": [ + { + "placeVisit": { + "location": { "latitudeE7": 123488888, "longitudeE7": 123488888 }, + "duration": { "startTimestamp": "2025-03-24T20:25:02+01:00" } + } + } + ] +} diff --git a/spec/fixtures/files/google/location-history/with_placeVisit_with_location_with_coordinates_with_milliseconds_timestamp.json b/spec/fixtures/files/google/location-history/with_placeVisit_with_location_with_coordinates_with_milliseconds_timestamp.json new file mode 100644 index 00000000..8b25ca80 --- /dev/null +++ b/spec/fixtures/files/google/location-history/with_placeVisit_with_location_with_coordinates_with_milliseconds_timestamp.json @@ -0,0 +1,10 @@ +{ + "timelineObjects": [ + { + "placeVisit": { + "location": { "latitudeE7": 123511111, "longitudeE7": 123511111 }, + "duration": { "startTimestamp": "1742844302585" } + } + } + ] +} diff --git a/spec/fixtures/files/google/location-history/with_placeVisit_with_location_with_coordinates_with_seconds_timestamp.json b/spec/fixtures/files/google/location-history/with_placeVisit_with_location_with_coordinates_with_seconds_timestamp.json new file mode 100644 index 00000000..a3301a03 --- /dev/null +++ b/spec/fixtures/files/google/location-history/with_placeVisit_with_location_with_coordinates_with_seconds_timestamp.json @@ -0,0 +1,10 @@ +{ + "timelineObjects": [ + { + "placeVisit": { + "location": { "latitudeE7": 123499999, "longitudeE7": 123499999 }, + "duration": { "startTimestamp": "1742844302" } + } + } + ] +} diff --git a/spec/fixtures/files/google/location-history/with_placeVisit_with_location_with_coordinates_with_timestampMs.json b/spec/fixtures/files/google/location-history/with_placeVisit_with_location_with_coordinates_with_timestampMs.json new file mode 100644 index 00000000..139399e4 --- /dev/null +++ b/spec/fixtures/files/google/location-history/with_placeVisit_with_location_with_coordinates_with_timestampMs.json @@ -0,0 +1,10 @@ +{ + "timelineObjects": [ + { + "placeVisit": { + "location": { "latitudeE7": 123522222, "longitudeE7": 123522222 }, + "duration": { "startTimestampMs": "1742844302585" } + } + } + ] +} diff --git a/spec/fixtures/files/google/location-history/with_placeVisit_without_location_with_coordinates.json b/spec/fixtures/files/google/location-history/with_placeVisit_without_location_with_coordinates.json new file mode 100644 index 00000000..eb245c7b --- /dev/null +++ b/spec/fixtures/files/google/location-history/with_placeVisit_without_location_with_coordinates.json @@ -0,0 +1,10 @@ +{ + "timelineObjects": [ + { + "placeVisit": { + "location": {}, + "duration": { "startTimestamp": "2025-03-24 20:25:02 +0100" } + } + } + ] +} diff --git a/spec/fixtures/files/google/location-history/with_placeVisit_without_location_with_coordinates_with_otherCandidateLocations.json b/spec/fixtures/files/google/location-history/with_placeVisit_without_location_with_coordinates_with_otherCandidateLocations.json new file mode 100644 index 00000000..f705b3ad --- /dev/null +++ b/spec/fixtures/files/google/location-history/with_placeVisit_without_location_with_coordinates_with_otherCandidateLocations.json @@ -0,0 +1,10 @@ +{ + "timelineObjects": [ + { + "placeVisit": { + "otherCandidateLocations": [{ "latitudeE7": 123533333, "longitudeE7": 123533333 }], + "duration": { "startTimestamp": "2025-03-24 20:25:02 +0100" } + } + } + ] +} diff --git a/spec/jobs/export_job_spec.rb b/spec/jobs/export_job_spec.rb index fe4261ae..b2fcfa29 100644 --- a/spec/jobs/export_job_spec.rb +++ b/spec/jobs/export_job_spec.rb @@ -8,8 +8,8 @@ RSpec.describe ExportJob, type: :job do let(:end_at) { Time.zone.now } it 'calls the Exports::Create service class' do - expect(Exports::Create).to receive(:new).with(export:, start_at:, end_at:, file_format: :json).and_call_original + expect(Exports::Create).to receive(:new).with(export:).and_call_original - described_class.perform_now(export.id, start_at, end_at) + described_class.perform_now(export.id) end end diff --git a/spec/models/export_spec.rb b/spec/models/export_spec.rb index baf46a92..bb15eb15 100644 --- a/spec/models/export_spec.rb +++ b/spec/models/export_spec.rb @@ -9,5 +9,6 @@ RSpec.describe Export, type: :model do describe 'enums' do it { is_expected.to define_enum_for(:status).with_values(created: 0, processing: 1, completed: 2, failed: 3) } + it { is_expected.to define_enum_for(:format).with_values(json: 0, gpx: 1) } end end diff --git a/spec/requests/exports_spec.rb b/spec/requests/exports_spec.rb index 2c5a6b72..89658348 100644 --- a/spec/requests/exports_spec.rb +++ b/spec/requests/exports_spec.rb @@ -76,25 +76,9 @@ RSpec.describe '/exports', type: :request do end describe 'DELETE /destroy' do - let!(:export) { create(:export, user:, url: 'exports/export.json', name: 'export.json') } - let(:export_file) { Rails.root.join('public', 'exports', export.name) } + let!(:export) { create(:export, user:, name: 'export.json') } - before do - sign_in user - - FileUtils.mkdir_p(File.dirname(export_file)) - File.write(export_file, '{"some": "data"}') - end - - after { FileUtils.rm_f(export_file) } - - it 'removes the export file from disk' do - expect(File.exist?(export_file)).to be true - - delete export_url(export) - - expect(File.exist?(export_file)).to be false - end + before { sign_in user } it 'destroys the requested export' do expect { delete export_url(export) }.to change(Export, :count).by(-1) diff --git a/spec/services/exports/create_spec.rb b/spec/services/exports/create_spec.rb index 1bea40d2..e8ecb08a 100644 --- a/spec/services/exports/create_spec.rb +++ b/spec/services/exports/create_spec.rb @@ -4,15 +4,17 @@ require 'rails_helper' RSpec.describe Exports::Create do describe '#call' do - subject(:create_export) { described_class.new(export:, start_at:, end_at:, file_format:).call } + subject(:create_export) { described_class.new(export:).call } let(:file_format) { :json } let(:user) { create(:user) } let(:start_at) { DateTime.new(2021, 1, 1).to_s } let(:end_at) { DateTime.new(2021, 1, 2).to_s } let(:export_name) { "#{start_at.to_date}_#{end_at.to_date}.#{file_format}" } - let(:export) { create(:export, user:, name: export_name, status: :created) } - let(:export_content) { Points::GeojsonSerializer.new(points).call } + let(:export) do + create(:export, user:, name: export_name, status: :created, format: file_format, start_at:, end_at:) + end + let(:export_content) { Points::GeojsonSerializer.new(points).call } let(:reverse_geocoded_at) { Time.zone.local(2021, 1, 1) } let!(:points) do 10.times.map do |i| @@ -35,10 +37,10 @@ RSpec.describe Exports::Create do expect(File.read(file_path).strip).to eq(export_content) end - it 'sets the export url' do + it 'sets the export file' do create_export - expect(export.reload.url).to eq("exports/#{export.name}") + expect(export.reload.file.attached?).to be_truthy end it 'updates the export status to completed' do @@ -53,7 +55,7 @@ RSpec.describe Exports::Create do context 'when an error occurs' do before do - allow(File).to receive(:open).and_raise(StandardError) + allow_any_instance_of(Points::GeojsonSerializer).to receive(:call).and_raise(StandardError) end it 'updates the export status to failed' do diff --git a/spec/services/google_maps/semantic_history_parser_spec.rb b/spec/services/google_maps/semantic_history_parser_spec.rb index 9859c930..336df99c 100644 --- a/spec/services/google_maps/semantic_history_parser_spec.rb +++ b/spec/services/google_maps/semantic_history_parser_spec.rb @@ -7,36 +7,28 @@ RSpec.describe GoogleMaps::SemanticHistoryParser do subject(:parser) { described_class.new(import, user.id).call } let(:user) { create(:user) } - let(:time) { Time.zone.now } + let!(:import) { create(:import, user:) } + let(:file_path) { Rails.root.join("spec/fixtures/files/google/location-history/#{file_name}.json") } + + before do + import.file.attach( + io: File.open(file_path), + filename: 'semantic_history.json', + content_type: 'application/json' + ) + end context 'when activitySegment is present' do context 'when startLocation is blank' do - let(:import) { create(:import, raw_data: { 'timelineObjects' => [activity_segment] }) } - let(:activity_segment) do - { - 'activitySegment' => { - 'waypointPath' => { - 'waypoints' => [ - { 'latE7' => 123_456_789, 'lngE7' => 123_456_789 } - ] - }, - 'duration' => { 'startTimestamp' => time.to_s } - } - } - end + let(:file_name) { 'with_activitySegment_without_startLocation' } it 'creates a point' do expect { parser }.to change(Point, :count).by(1) + expect(Point.last.lonlat.to_s).to eq('POINT (12.3411111 12.3411111)') end context 'when waypointPath is blank' do - let(:activity_segment) do - { - 'activitySegment' => { - 'duration' => { 'startTimestamp' => time.to_s } - } - } - end + let(:file_name) { 'with_activitySegment_without_startLocation_without_waypointPath' } it 'does not create a point' do expect { parser }.not_to change(Point, :count) @@ -45,78 +37,47 @@ RSpec.describe GoogleMaps::SemanticHistoryParser do end context 'when startLocation is present' do - let(:import) { create(:import, raw_data: { 'timelineObjects' => [activity_segment] }) } - let(:activity_segment) do - { - 'activitySegment' => { - 'startLocation' => { 'latitudeE7' => 123_456_789, 'longitudeE7' => 123_456_789 }, - 'duration' => { 'startTimestamp' => time.to_s } - } - } - end + let(:file_name) { 'with_activitySegment_with_startLocation' } it 'creates a point' do expect { parser }.to change(Point, :count).by(1) + expect(Point.last.lonlat.to_s).to eq('POINT (12.3422222 12.3422222)') end context 'with different timestamp formats' do context 'when timestamp is in ISO format' do - let(:activity_segment) do - { - 'activitySegment' => { - 'startLocation' => { 'latitudeE7' => 123_456_789, 'longitudeE7' => 123_456_789 }, - 'duration' => { 'startTimestamp' => time.iso8601 } - } - } - end + let(:file_name) { 'with_activitySegment_with_startLocation_with_iso_timestamp' } it 'creates a point' do expect { parser }.to change(Point, :count).by(1) + expect(Point.last.lonlat.to_s).to eq('POINT (12.3433333 12.3433333)') end end context 'when timestamp is in seconds format' do - let(:activity_segment) do - { - 'activitySegment' => { - 'startLocation' => { 'latitudeE7' => 123_456_789, 'longitudeE7' => 123_456_789 }, - 'duration' => { 'startTimestamp' => time.to_i.to_s } - } - } - end + let(:file_name) { 'with_activitySegment_with_startLocation_timestamp_in_seconds_format' } it 'creates a point' do expect { parser }.to change(Point, :count).by(1) + expect(Point.last.lonlat.to_s).to eq('POINT (12.3444444 12.3444444)') end end context 'when timestamp is in milliseconds format' do - let(:activity_segment) do - { - 'activitySegment' => { - 'startLocation' => { 'latitudeE7' => 123_456_789, 'longitudeE7' => 123_456_789 }, - 'duration' => { 'startTimestamp' => (time.to_f * 1000).to_i.to_s } - } - } - end + let(:file_name) { 'with_activitySegment_with_startLocation_timestamp_in_milliseconds_format' } it 'creates a point' do expect { parser }.to change(Point, :count).by(1) + expect(Point.last.lonlat.to_s).to eq('POINT (12.3455555 12.3455555)') end end context 'when timestampMs is used' do - let(:activity_segment) do - { - 'activitySegment' => { - 'startLocation' => { 'latitudeE7' => 123_456_789, 'longitudeE7' => 123_456_789 }, - 'duration' => { 'timestampMs' => (time.to_f * 1000).to_i.to_s } - } - } - end + let(:file_name) { 'with_activitySegment_with_startLocation_timestampMs' } it 'creates a point' do expect { parser }.to change(Point, :count).by(1) + expect(Point.last.lonlat.to_s).to eq('POINT (12.3466666 12.3466666)') end end end @@ -125,110 +86,65 @@ RSpec.describe GoogleMaps::SemanticHistoryParser do context 'when placeVisit is present' do context 'when location with coordinates is present' do - let(:import) { create(:import, raw_data: { 'timelineObjects' => [place_visit] }) } - let(:place_visit) do - { - 'placeVisit' => { - 'location' => { 'latitudeE7' => 123_456_789, 'longitudeE7' => 123_456_789 }, - 'duration' => { 'startTimestamp' => time.to_s } - } - } - end + let(:file_name) { 'with_placeVisit_with_location_with_coordinates' } it 'creates a point' do expect { parser }.to change(Point, :count).by(1) + expect(Point.last.lonlat.to_s).to eq('POINT (12.3477777 12.3477777)') end context 'with different timestamp formats' do context 'when timestamp is in ISO format' do - let(:place_visit) do - { - 'placeVisit' => { - 'location' => { 'latitudeE7' => 123_456_789, 'longitudeE7' => 123_456_789 }, - 'duration' => { 'startTimestamp' => time.iso8601 } - } - } - end + let(:file_name) { 'with_placeVisit_with_location_with_coordinates_with_iso_timestamp' } it 'creates a point' do expect { parser }.to change(Point, :count).by(1) + expect(Point.last.lonlat.to_s).to eq('POINT (12.3488888 12.3488888)') end end context 'when timestamp is in seconds format' do - let(:place_visit) do - { - 'placeVisit' => { - 'location' => { 'latitudeE7' => 123_456_789, 'longitudeE7' => 123_456_789 }, - 'duration' => { 'startTimestamp' => time.to_i.to_s } - } - } - end + let(:file_name) { 'with_placeVisit_with_location_with_coordinates_with_seconds_timestamp' } it 'creates a point' do expect { parser }.to change(Point, :count).by(1) + expect(Point.last.lonlat.to_s).to eq('POINT (12.3499999 12.3499999)') end end context 'when timestamp is in milliseconds format' do - let(:place_visit) do - { - 'placeVisit' => { - 'location' => { 'latitudeE7' => 123_456_789, 'longitudeE7' => 123_456_789 }, - 'duration' => { 'startTimestamp' => (time.to_f * 1000).to_i.to_s } - } - } - end + let(:file_name) { 'with_placeVisit_with_location_with_coordinates_with_milliseconds_timestamp' } it 'creates a point' do expect { parser }.to change(Point, :count).by(1) + expect(Point.last.lonlat.to_s).to eq('POINT (12.3511111 12.3511111)') end end context 'when timestampMs is used' do - let(:place_visit) do - { - 'placeVisit' => { - 'location' => { 'latitudeE7' => 123_456_789, 'longitudeE7' => 123_456_789 }, - 'duration' => { 'timestampMs' => (time.to_f * 1000).to_i.to_s } - } - } - end + let(:file_name) { 'with_placeVisit_with_location_with_coordinates_with_timestampMs' } it 'creates a point' do expect { parser }.to change(Point, :count).by(1) + expect(Point.last.lonlat.to_s).to eq('POINT (12.3522222 12.3522222)') end end end end context 'when location with coordinates is blank' do - let(:import) { create(:import, raw_data: { 'timelineObjects' => [place_visit] }) } - let(:place_visit) do - { - 'placeVisit' => { - 'location' => {}, - 'duration' => { 'startTimestamp' => time.to_s } - } - } - end + let(:file_name) { 'with_placeVisit_without_location_with_coordinates' } it 'does not create a point' do expect { parser }.not_to change(Point, :count) end context 'when otherCandidateLocations is present' do - let(:place_visit) do - { - 'placeVisit' => { - 'otherCandidateLocations' => [{ 'latitudeE7' => 123_456_789, 'longitudeE7' => 123_456_789 }], - 'duration' => { 'startTimestamp' => time.to_s } - } - } - end + let(:file_name) { 'with_placeVisit_without_location_with_coordinates_with_otherCandidateLocations' } it 'creates a point' do expect { parser }.to change(Point, :count).by(1) + expect(Point.last.lonlat.to_s).to eq('POINT (12.3533333 12.3533333)') end end end From 41380ddf7cd0b4fc78bbdba13e11714d74fbe0b9 Mon Sep 17 00:00:00 2001 From: Eugene Burmakin Date: Mon, 24 Mar 2025 20:58:43 +0100 Subject: [PATCH 09/11] Move some beds --- app/controllers/exports_controller.rb | 4 ---- app/models/user.rb | 2 ++ app/services/exports/create.rb | 8 ++++---- app/services/google_maps/semantic_history_parser.rb | 4 ++-- 4 files changed, 8 insertions(+), 10 deletions(-) diff --git a/app/controllers/exports_controller.rb b/app/controllers/exports_controller.rb index e15ab66f..b557dc9a 100644 --- a/app/controllers/exports_controller.rb +++ b/app/controllers/exports_controller.rb @@ -37,8 +37,4 @@ class ExportsController < ApplicationController def set_export @export = current_user.exports.find(params[:id]) end - - def export_params - params.require(:export).permit(:name, :url, :status, :format) - end end diff --git a/app/models/user.rb b/app/models/user.rb index 44225162..69fecdf5 100644 --- a/app/models/user.rb +++ b/app/models/user.rb @@ -118,6 +118,7 @@ class User < ApplicationRecord settings.try(:[], 'maps')&.try(:[], 'url')&.strip! end + # rubocop:disable Metrics/MethodLength def import_sample_points return unless Rails.env.development? || Rails.env.production? || @@ -134,4 +135,5 @@ class User < ApplicationRecord ) ) end + # rubocop:enable Metrics/MethodLength end diff --git a/app/services/exports/create.rb b/app/services/exports/create.rb index c6ebd82f..590173ad 100644 --- a/app/services/exports/create.rb +++ b/app/services/exports/create.rb @@ -21,10 +21,10 @@ class Exports::Create export.update!(status: :completed) - create_export_finished_notification + notify_export_finished end rescue StandardError => e - create_failed_export_notification(e) + notify_export_failed(e) export.update!(status: :failed) end @@ -40,7 +40,7 @@ class Exports::Create .order(timestamp: :asc) end - def create_export_finished_notification + def notify_export_finished Notifications::Create.new( user:, kind: :info, @@ -49,7 +49,7 @@ class Exports::Create ).call end - def create_failed_export_notification(error) + def notify_export_failed(error) Notifications::Create.new( user:, kind: :error, diff --git a/app/services/google_maps/semantic_history_parser.rb b/app/services/google_maps/semantic_history_parser.rb index dd5f23d0..b8d38c5d 100644 --- a/app/services/google_maps/semantic_history_parser.rb +++ b/app/services/google_maps/semantic_history_parser.rb @@ -13,7 +13,7 @@ class GoogleMaps::SemanticHistoryParser end def call - parsed_json.each_slice(BATCH_SIZE) do |batch| + points_data.each_slice(BATCH_SIZE) do |batch| @current_index += batch.size process_batch(batch) broadcast_import_progress(import, @current_index) @@ -60,7 +60,7 @@ class GoogleMaps::SemanticHistoryParser ) end - def parsed_json + def points_data data = nil import.file.download do |f| From faf07f662e156fc220fcbb41ef885f8760dcf295 Mon Sep 17 00:00:00 2001 From: Eugene Burmakin Date: Mon, 24 Mar 2025 21:30:01 +0100 Subject: [PATCH 10/11] Add condition to load S3 config only if not in test environment --- config/storage.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/config/storage.yml b/config/storage.yml index 02112314..40c62666 100644 --- a/config/storage.yml +++ b/config/storage.yml @@ -6,12 +6,15 @@ local: service: Disk root: <%= Rails.root.join("storage") %> +# Only load S3 config if not in test environment +<% unless Rails.env.test? %> s3: service: S3 access_key_id: <%= ENV.fetch("AWS_ACCESS_KEY_ID") %> secret_access_key: <%= ENV.fetch("AWS_SECRET_ACCESS_KEY") %> region: <%= ENV.fetch("AWS_REGION") %> bucket: <%= ENV.fetch("AWS_BUCKET") %> +<% end %> # Remember not to checkin your GCS keyfile to a repository # google: From 652a51281b3fbceffd83b95f8f5597fb774889f1 Mon Sep 17 00:00:00 2001 From: Eugene Burmakin Date: Wed, 2 Apr 2025 20:58:35 +0200 Subject: [PATCH 11/11] Add an optional task to migrate existing imports to the new storage. --- CHANGELOG.md | 11 +++++++---- app/models/import.rb | 8 ++++++++ config/credentials/production.key | 1 + lib/tasks/import.rake | 3 +-- lib/tasks/imports.rake | 13 +++++++++++++ spec/models/import_spec.rb | 19 +++++++++++++++++++ 6 files changed, 49 insertions(+), 6 deletions(-) create mode 100644 config/credentials/production.key create mode 100644 lib/tasks/imports.rake diff --git a/CHANGELOG.md b/CHANGELOG.md index 1fdabb84..f951e4a2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,12 +4,14 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/) and this project adheres to [Semantic Versioning](http://semver.org/). -# Unreleased +# 0.25.4 - 2025-04-02 -## TODO: +In this release we're changing the way import files are being stored. Previously, they were being stored in the `raw_data` column of the `imports` table. Now, they are being attached to the import record. All new imports will be using the new storage, to migrate existing imports, you can use the `rake imports:migrate_to_new_storage` task. Run it in the container shell. -- Migrate existing imports from `raw_data` to the new file storage. -- Stream import files for parsing instead of downloading them. +This is an optional task, that will not affect your points or other data. +Big imports might take a while to migrate, so be patient. + +If your hardware doesn't have enough memory to migrate the imports, you can delete your imports and re-import them. ## Changed @@ -18,6 +20,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/). - Export files are now being attached to the export record instead of being stored in the file system. - Export files can now be stored in S3-compatible storage. + # 0.25.3 - 2025-03-22 ## Fixed diff --git a/app/models/import.rb b/app/models/import.rb index 1e490e56..3965c219 100644 --- a/app/models/import.rb +++ b/app/models/import.rb @@ -30,4 +30,12 @@ class Import < ApplicationRecord [time.year, time.month] end.uniq end + + def migrate_to_new_storage + return if file.attached? + + raw_file = File.new(raw_data) + + file.attach(io: raw_file, filename: name, content_type: 'application/json') + end end diff --git a/config/credentials/production.key b/config/credentials/production.key new file mode 100644 index 00000000..4c969005 --- /dev/null +++ b/config/credentials/production.key @@ -0,0 +1 @@ +41976cfff86107bc1bb52cec7d8107b0 \ No newline at end of file diff --git a/lib/tasks/import.rake b/lib/tasks/import.rake index 72f7d1ba..14f30548 100644 --- a/lib/tasks/import.rake +++ b/lib/tasks/import.rake @@ -1,8 +1,7 @@ # frozen_string_literal: true -# Usage: rake import:big_file['/path/to/file.json','user@email.com'] - namespace :import do + # Usage: rake import:big_file['/path/to/file.json','user@email.com'] desc 'Accepts a file path and user email and imports the data into the database' task :big_file, %i[file_path user_email] => :environment do |_, args| diff --git a/lib/tasks/imports.rake b/lib/tasks/imports.rake new file mode 100644 index 00000000..ffd49fbe --- /dev/null +++ b/lib/tasks/imports.rake @@ -0,0 +1,13 @@ +# frozen_string_literal: true + +namespace :imports do + desc 'Migrate existing imports from `raw_data` to the new file storage' + + task migrate_to_new_storage: :environment do + Import.find_each do |import| + import.migrate_to_new_storage + rescue StandardError => e + puts "Error migrating import #{import.id}: #{e.message}" + end + end +end diff --git a/spec/models/import_spec.rb b/spec/models/import_spec.rb index 8b682409..07844e33 100644 --- a/spec/models/import_spec.rb +++ b/spec/models/import_spec.rb @@ -36,4 +36,23 @@ RSpec.describe Import, type: :model do expect(import.years_and_months_tracked).to eq([[2024, 11]]) end end + + describe '#migrate_to_new_storage' do + let(:raw_data) { Rails.root.join('spec/fixtures/files/geojson/export.json') } + let(:import) { create(:import, source: 'geojson', raw_data:) } + + it 'attaches the file to the import' do + import.migrate_to_new_storage + + expect(import.file.attached?).to be_truthy + end + + context 'when file is attached' do + it 'is a importable file' do + import.migrate_to_new_storage + + expect { import.process! }.to change(Point, :count).by(10) + end + end + end end