Implement Records.json import via the UI

This commit is contained in:
Eugene Burmakin 2025-04-03 18:41:05 +02:00
parent 2f9cacba61
commit 73edb35bb1
32 changed files with 436 additions and 66 deletions

View file

@ -7,6 +7,40 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
# 0.25.4 - 2025-04-02
⚠️ This release includes a breaking change. ⚠️
Make sure to add `dawarich_storage` volume to your `docker-compose.yml` file. Example:
```diff
...
dawarich_app:
image: freikin/dawarich:latest
container_name: dawarich_app
volumes:
- dawarich_public:/var/app/public
- dawarich_watched:/var/app/tmp/imports/watched
+ - dawarich_storage:/var/app/storage
...
dawarich_sidekiq:
image: freikin/dawarich:latest
container_name: dawarich_sidekiq
volumes:
- dawarich_public:/var/app/public
- dawarich_watched:/var/app/tmp/imports/watched
+ - dawarich_storage:/var/app/storage
volumes:
dawarich_db_data:
dawarich_shared:
dawarich_public:
dawarich_watched:
+ dawarich_storage:
```
In this release we're changing the way import files are being stored. Previously, they were being stored in the `raw_data` column of the `imports` table. Now, they are being attached to the import record. All new imports will be using the new storage, to migrate existing imports, you can use the `rake imports:migrate_to_new_storage` task. Run it in the container shell.
This is an optional task, that will not affect your points or other data.
@ -14,17 +48,25 @@ Big imports might take a while to migrate, so be patient.
If your hardware doesn't have enough memory to migrate the imports, you can delete your imports and re-import them.
## Added
- Sentry is now can be used for error tracking.
## Changed
- Import files are now being attached to the import record instead of being stored in the `raw_data` database column.
- Import files can now be stored in S3-compatible storage.
- Export files are now being attached to the export record instead of being stored in the file system.
- Export files can now be stored in S3-compatible storage.
- Users can now import Google's Records.json file via the UI instead of using the CLI.
- Optional telemetry sending is now disabled and will be removed in the future.
## Fixed
- Moving points on the map now works correctly. #957
- `rake points:migrate_to_lonlat` task now also reindexes the points table.
- Fixed filling `lonlat` column for old places after reverse geocoding.
- Deleting an import now correctly recalculates stats.
# 0.25.3 - 2025-03-22

View file

@ -31,6 +31,8 @@ gem 'rgeo'
gem 'rgeo-activerecord'
gem 'rswag-api'
gem 'rswag-ui'
gem 'sentry-ruby'
gem 'sentry-rails'
gem 'sidekiq'
gem 'sidekiq-cron'
gem 'sidekiq-limit_fetch'

View file

@ -383,6 +383,12 @@ GEM
rubocop-ast (>= 1.38.0, < 2.0)
ruby-progressbar (1.13.0)
securerandom (0.4.1)
sentry-rails (5.23.0)
railties (>= 5.0)
sentry-ruby (~> 5.23.0)
sentry-ruby (5.23.0)
bigdecimal
concurrent-ruby (~> 1.0, >= 1.0.2)
shoulda-matchers (6.4.0)
activesupport (>= 5.2.0)
sidekiq (7.3.9)
@ -501,6 +507,8 @@ DEPENDENCIES
rswag-specs
rswag-ui
rubocop-rails
sentry-rails
sentry-ruby
shoulda-matchers
sidekiq
sidekiq-cron

View file

@ -1,6 +1,8 @@
# frozen_string_literal: true
class ExportsController < ApplicationController
include ActiveStorage::SetCurrent
before_action :authenticate_user!
before_action :set_export, only: %i[destroy]

View file

@ -1,6 +1,8 @@
# frozen_string_literal: true
class ImportsController < ApplicationController
include ActiveStorage::SetCurrent
before_action :authenticate_user!
before_action :authenticate_active_user!, only: %i[new create]
before_action :set_import, only: %i[show destroy]
@ -9,7 +11,7 @@ class ImportsController < ApplicationController
@imports =
current_user
.imports
.select(:id, :name, :source, :created_at, :points_count)
.select(:id, :name, :source, :created_at, :processed)
.order(created_at: :desc)
.page(params[:page])
end

View file

@ -0,0 +1,13 @@
# frozen_string_literal: true
class Import::UpdatePointsCountJob < ApplicationJob
queue_as :imports
def perform(import_id)
import = Import.find(import_id)
import.update(processed: import.points.count)
rescue ActiveRecord::RecordNotFound
nil
end
end

View file

@ -5,6 +5,8 @@ class Import::WatcherJob < ApplicationJob
sidekiq_options retry: false
def perform
return unless DawarichSettings.self_hosted?
Imports::Watcher.new.call
end
end

View file

@ -11,9 +11,21 @@ class Export < ApplicationRecord
has_one_attached :file
after_commit -> { ExportJob.perform_later(id) }, on: :create
after_commit -> { file.purge }, on: :destroy
after_commit -> { remove_attached_file }, on: :destroy
def process!
Exports::Create.new(export: self).call
end
private
def remove_attached_file
storage_config = Rails.application.config.active_storage
if storage_config.service == :local
file.purge_later
else
file.purge
end
end
end

View file

@ -4,12 +4,10 @@ class Import < ApplicationRecord
belongs_to :user
has_many :points, dependent: :destroy
delegate :count, to: :points, prefix: true
has_one_attached :file
after_commit -> { Import::ProcessJob.perform_later(id) }, on: :create
after_commit -> { file.purge }, on: :destroy
after_commit :remove_attached_file, on: :destroy
enum :source, {
google_semantic_history: 0, owntracks: 1, google_records: 2,
@ -38,4 +36,10 @@ class Import < ApplicationRecord
file.attach(io: raw_file, filename: name, content_type: 'application/json')
end
private
def remove_attached_file
file.purge_later
end
end

View file

@ -30,6 +30,7 @@ class Point < ApplicationRecord
after_create :async_reverse_geocode
after_create_commit :broadcast_coordinates
after_commit -> { Import::UpdatePointsCountJob.perform_later(import_id) }, on: :destroy, if: -> { import_id.present? }
def self.without_raw_data
select(column_names - ['raw_data'])

View file

@ -1,5 +1,8 @@
# frozen_string_literal: true
# This class is used to import Google's Records.json file
# via the CLI, vs the UI, which uses the `GoogleMaps::RecordsStorage Importer` class.
class GoogleMaps::RecordsImporter
include Imports::Broadcaster

View file

@ -0,0 +1,82 @@
# frozen_string_literal: true
# This class is used to import Google's Records.json file
# via the UI, vs the CLI, which uses the `GoogleMaps::RecordsImporter` class.
class GoogleMaps::RecordsStorageImporter
BATCH_SIZE = 1000
def initialize(import, user_id)
@import = import
@user = User.find_by(id: user_id)
end
def call
process_file_in_batches
rescue Oj::ParseError => e
Rails.logger.error("JSON parsing error: #{e.message}")
raise
end
private
attr_reader :import, :user
def process_file_in_batches
retries = 0
max_retries = 3
begin
file = Timeout.timeout(300) do # 5 minutes timeout
import.file.download
end
# Verify file size
expected_size = import.file.blob.byte_size
actual_size = file.size
if expected_size != actual_size
raise "Incomplete download: expected #{expected_size} bytes, got #{actual_size} bytes"
end
# Verify checksum
expected_checksum = import.file.blob.checksum
actual_checksum = Base64.strict_encode64(Digest::MD5.digest(file))
if expected_checksum != actual_checksum
raise "Checksum mismatch: expected #{expected_checksum}, got #{actual_checksum}"
end
parsed_file = Oj.load(file, mode: :compat)
return unless parsed_file.is_a?(Hash) && parsed_file['locations']
batch = []
index = 0
parsed_file['locations'].each do |location|
batch << location
next if batch.size < BATCH_SIZE
index += BATCH_SIZE
GoogleMaps::RecordsImporter.new(import, index).call(batch)
batch = []
end
rescue Timeout::Error => e
retries += 1
if retries <= max_retries
Rails.logger.warn("Download timeout, attempt #{retries} of #{max_retries}")
retry
else
Rails.logger.error("Download failed after #{max_retries} attempts")
raise
end
rescue StandardError => e
Rails.logger.error("Download error: #{e.message}")
raise
end
end
end

View file

@ -15,6 +15,7 @@ class Imports::Create
schedule_stats_creating(user.id)
schedule_visit_suggesting(user.id, import)
update_import_points_count(import)
rescue StandardError => e
create_import_failed_notification(import, user, e)
end
@ -26,6 +27,7 @@ class Imports::Create
case source
when 'google_semantic_history' then GoogleMaps::SemanticHistoryParser
when 'google_phone_takeout' then GoogleMaps::PhoneTakeoutParser
when 'google_records' then GoogleMaps::RecordsStorageImporter
when 'owntracks' then OwnTracks::Importer
when 'gpx' then Gpx::TrackImporter
when 'geojson' then Geojson::ImportParser
@ -33,6 +35,10 @@ class Imports::Create
end
end
def update_import_points_count(import)
Import::UpdatePointsCountJob.perform_later(import.id)
end
def schedule_stats_creating(user_id)
import.years_and_months_tracked.each do |year, month|
Stats::CalculatingJob.perform_later(user_id, year, month)

View file

@ -11,6 +11,6 @@ class Imports::Destroy
def call
@import.destroy!
BulkStatsCalculatingJob.perform_later(@user.id)
Stats::BulkCalculator.new(@user.id).call
end
end

View file

@ -19,6 +19,7 @@ class ReverseGeocoding::Places::FetchData
first_place = reverse_geocoded_places.shift
update_place(first_place)
reverse_geocoded_places.each { |reverse_geocoded_place| fetch_and_create_place(reverse_geocoded_place) }
end
@ -49,6 +50,9 @@ class ReverseGeocoding::Places::FetchData
new_place.country = data['properties']['country']
new_place.geodata = data
new_place.source = :photon
if new_place.lonlat.blank?
new_place.lonlat = "POINT(#{data['geometry']['coordinates'][0]} #{data['geometry']['coordinates'][1]})"
end
new_place.save!
end
@ -88,7 +92,7 @@ class ReverseGeocoding::Places::FetchData
limit: 10,
distance_sort: true,
radius: 1,
units: ::DISTANCE_UNIT,
units: ::DISTANCE_UNIT
)
data.reject do |place|

View file

@ -13,6 +13,24 @@
<p class="text-sm mt-2">JSON files from your Takeout/Location History/Semantic Location History/YEAR</p>
</div>
</div>
<div class="card bordered shadow-lg p-3 hover:shadow-blue-500/50">
<div class="form-control">
<label class="label cursor-pointer space-x-3">
<%= form.radio_button :source, :google_records, class: "radio radio-primary" %>
<span class="label-text">Google Records</span>
</label>
<p class="text-sm mt-2">The Records.json file from your Google Takeout</p>
</div>
</div>
<div class="card bordered shadow-lg p-3 hover:shadow-blue-500/50">
<div class="form-control">
<label class="label cursor-pointer space-x-3">
<%= form.radio_button :source, :google_phone_takeout, class: "radio radio-primary" %>
<span class="label-text">Google Phone Takeout</span>
</label>
<p class="text-sm mt-2">A JSON file you received after your request for Takeout from your mobile device</p>
</div>
</div>
<div class="card bordered shadow-lg p-3 hover:shadow-blue-500/50">
<div class="form-control">
<label class="label cursor-pointer space-x-3">
@ -31,15 +49,6 @@
<p class="text-sm mt-2">A valid GeoJSON file. For example, a file, exported from a Dawarich instance</p>
</div>
</div>
<div class="card bordered shadow-lg p-3 hover:shadow-blue-500/50">
<div class="form-control">
<label class="label cursor-pointer space-x-3">
<%= form.radio_button :source, :google_phone_takeout, class: "radio radio-primary" %>
<span class="label-text">Google Phone Takeout</span>
</label>
<p class="text-sm mt-2">A JSON file you received after your request for Takeout from your mobile device</p>
</div>
</div>
<div class="card bordered shadow-lg p-3 hover:shadow-blue-500/50">
<div class="form-control">
<label class="label cursor-pointer space-x-3">

View file

@ -53,7 +53,7 @@
<% @imports.each do |import| %>
<tr data-import-id="<%= import.id %>"
id="import-<%= import.id %>"
data-points-total="<%= import.points_count %>">
data-points-total="<%= import.processed %>">
<td>
<%= link_to import.name, import, class: 'underline hover:no-underline' %>
(<%= import.source %>)
@ -63,7 +63,7 @@
<%= link_to '📋', points_path(import_id: import.id) %>
</td>
<td data-points-count>
<%= number_with_delimiter import.points_count %>
<%= number_with_delimiter import.processed %>
</td>
<td data-reverse-geocoded-points-count>
<%= number_with_delimiter import.reverse_geocoded_points_count %>

View file

@ -3,36 +3,6 @@
<div class="mx-auto md:w-2/3 w-full">
<h1 class="font-bold text-4xl">New import</h1>
<div role="alert" class="alert alert-info my-5">
<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" class="stroke-current shrink-0 w-6 h-6"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M13 16h-1v-4h-1m1-4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z"></path></svg>
<span>
<p>To import <code>Records.json</code> file from your Google Takeout Archive, use rake task.</p>
<p class='mb-3'>Import takes a while to finish, so you might want to run it in <code>screen</code> session.</p>
<p class='mt-5 mb-2'>1. Upload your Records.json file to your server</p>
<p class='mt-5 mb-2'>2. Copy you Records.json to the <code>tmp</code> folder:
<div class="mockup-code">
<pre data-prefix="$"><code>docker cp Records.json dawarich_app:/var/app/tmp/imports/Records.json</code></pre>
</div>
</p>
<p class='mt-5 mb-2'>3. Attach to the docker container:
<div class="mockup-code">
<pre data-prefix="$"><code>docker exec -it dawarich_app sh</code></pre>
</div>
</p>
<p class='mt-5 mb-2'>4. Run the rake task:
<div class="mockup-code">
<pre data-prefix="$"><code>bundle exec rake import:big_file['tmp/imports/Records.json','user@example.com']</code>
</pre>
</div>
</p>
<p class='mt-5 mb-2'>5. Wait patiently for process to finish</p>
<p class='mt-3'>You can monitor progress in <a href="/sidekiq" class="underline">Sidekiq UI</a></p>
</span>
</div>
<%= render "form", import: @import %>
<%= link_to "Back to imports", imports_path, class: "btn mx-5 mb-5" %>

View file

@ -15,6 +15,7 @@
<%= stylesheet_link_tag "application", "data-turbo-track": "reload" %>
<%= javascript_importmap_tags %>
<%= render 'application/favicon' %>
<%= Sentry.get_trace_propagation_meta.html_safe if Sentry.initialized? %>
</head>
<body class='min-h-screen'>

View file

@ -21,3 +21,5 @@ NOMINATIM_API_USE_HTTPS = ENV.fetch('NOMINATIM_API_USE_HTTPS', 'true') == 'true'
GEOAPIFY_API_KEY = ENV.fetch('GEOAPIFY_API_KEY', nil)
# /Reverse geocoding settings
SENTRY_DSN = ENV.fetch('SENTRY_DSN', nil)

View file

@ -2,10 +2,15 @@
require 'aws-sdk-core'
Aws.config.update(
{
region: ENV['AWS_REGION'],
endpoint: ENV['AWS_ENDPOINT'],
credentials: Aws::Credentials.new(ENV['AWS_ACCESS_KEY_ID'], ENV['AWS_SECRET_ACCESS_KEY'])
}
)
if ENV['AWS_ACCESS_KEY_ID'] &&
ENV['AWS_SECRET_ACCESS_KEY'] &&
ENV['AWS_REGION'] &&
ENV['AWS_ENDPOINT']
Aws.config.update(
{
region: ENV['AWS_REGION'],
endpoint: ENV['AWS_ENDPOINT'],
credentials: Aws::Credentials.new(ENV['AWS_ACCESS_KEY_ID'], ENV['AWS_SECRET_ACCESS_KEY'])
}
)
end

View file

@ -0,0 +1,9 @@
# frozen_string_literal: true
return unless SENTRY_DSN
Sentry.init do |config|
config.breadcrumbs_logger = [:active_support_logger]
config.dsn = SENTRY_DSN
config.traces_sample_rate = 1.0
end

View file

@ -25,11 +25,6 @@ app_version_checking_job:
class: "AppVersionCheckingJob"
queue: default
telemetry_sending_job:
cron: "0 */1 * * *" # every 1 hour
class: "TelemetrySendingJob"
queue: default
cache_preheating_job:
cron: "0 0 * * *" # every day at 0:00
class: "Cache::PreheatingJob"

View file

@ -7,7 +7,7 @@ local:
root: <%= Rails.root.join("storage") %>
# Only load S3 config if not in test environment
<% unless Rails.env.test? %>
<% if !Rails.env.test? && ENV['AWS_ACCESS_KEY_ID'] && ENV['AWS_SECRET_ACCESS_KEY'] && ENV['AWS_REGION'] && ENV['AWS_BUCKET'] %>
s3:
service: S3
access_key_id: <%= ENV.fetch("AWS_ACCESS_KEY_ID") %>

View file

@ -0,0 +1,13 @@
# frozen_string_literal: true
class UpdateImportsPointsCount < ActiveRecord::Migration[8.0]
def up
Import.find_each do |import|
Import::UpdatePointsCountJob.perform_later(import.id)
end
end
def down
raise ActiveRecord::IrreversibleMigration
end
end

View file

@ -1 +1 @@
DataMigrate::Data.define(version: 20250303194123)
DataMigrate::Data.define(version: 20250403204658)

View file

@ -43,6 +43,7 @@ services:
volumes:
- dawarich_public:/var/app/public
- dawarich_watched:/var/app/tmp/imports/watched
- dawarich_storage:/var/app/storage
networks:
- dawarich
ports:
@ -98,6 +99,7 @@ services:
volumes:
- dawarich_public:/var/app/public
- dawarich_watched:/var/app/tmp/imports/watched
- dawarich_storage:/var/app/storage
networks:
- dawarich
stdin_open: true
@ -152,3 +154,4 @@ volumes:
dawarich_shared:
dawarich_public:
dawarich_watched:
dawarich_storage:

View file

@ -4,10 +4,28 @@ require 'rails_helper'
RSpec.describe Import::WatcherJob, type: :job do
describe '#perform' do
it 'calls Imports::Watcher' do
expect_any_instance_of(Imports::Watcher).to receive(:call)
context 'when Dawarich is not self-hosted' do
before do
allow(DawarichSettings).to receive(:self_hosted?).and_return(false)
end
described_class.perform_now
it 'does not call Imports::Watcher' do
expect_any_instance_of(Imports::Watcher).not_to receive(:call)
described_class.perform_now
end
end
context 'when Dawarich is self-hosted' do
before do
allow(DawarichSettings).to receive(:self_hosted?).and_return(true)
end
it 'calls Imports::Watcher' do
expect_any_instance_of(Imports::Watcher).to receive(:call)
described_class.perform_now
end
end
end
end

View file

@ -79,4 +79,14 @@ RSpec.describe Point, type: :model do
end
end
end
describe 'callbacks' do
describe '#update_import_points_count' do
let(:point) { create(:point, import_id: 1) }
it 'updates the import points count' do
expect { point.destroy }.to have_enqueued_job(Import::UpdatePointsCountJob).with(1)
end
end
end
end

View file

@ -0,0 +1,147 @@
require 'rails_helper'
RSpec.describe GoogleMaps::RecordsStorageImporter do
let(:user) { create(:user) }
let(:import) { create(:import, source: 'google_records') }
let(:file_path) { Rails.root.join('spec/fixtures/files/google/records.json') }
let(:file_content) { File.read(file_path) }
let(:file) { Rack::Test::UploadedFile.new(file_path, 'application/json') }
let(:parsed_content) { JSON.parse(file_content) }
before do
import.file.attach(
io: StringIO.new(file_content),
filename: 'records.json',
content_type: 'application/json'
)
end
subject { described_class.new(import, user.id) }
describe '#call' do
context 'with valid file' do
it 'processes files correctly' do
# Add a test spy to verify behavior
records_importer = class_spy(GoogleMaps::RecordsImporter)
stub_const('GoogleMaps::RecordsImporter', records_importer)
# Run the method
subject.call
# Small files won't process any batches (< BATCH_SIZE)
expect(records_importer).not_to have_received(:new)
end
context 'when file has more locations than batch size' do
let(:large_batch) do
locations = []
1001.times do |i|
locations << {
latitudeE7: 533_690_550,
longitudeE7: 836_950_010,
accuracy: 150,
source: 'UNKNOWN',
timestamp: '2012-12-15T14:21:29.460Z'
}
end
{ locations: locations }.to_json
end
before do
import.file.attach(
io: StringIO.new(large_batch),
filename: 'records.json',
content_type: 'application/json'
)
end
it 'processes in batches of 1000' do
# Add a test spy to verify behavior
mock_importer = instance_double(GoogleMaps::RecordsImporter)
allow(GoogleMaps::RecordsImporter).to receive(:new).and_return(mock_importer)
allow(mock_importer).to receive(:call)
# Run the method
subject.call
# Verify that the importer was called with the first 1000 locations
expect(GoogleMaps::RecordsImporter).to have_received(:new).with(import, 1000)
# Based on the implementation, remaining 1 item is NOT processed
# Because there's no code after the loop to handle remaining items
expect(GoogleMaps::RecordsImporter).to have_received(:new).exactly(1).times
end
end
end
context 'with download issues' do
it 'retries on timeout' do
call_count = 0
allow(import.file.blob).to receive(:download) do
call_count += 1
call_count < 3 ? raise(Timeout::Error) : file_content
end
expect(Rails.logger).to receive(:warn).twice
subject.call
expect(call_count).to eq(3)
end
it 'fails after max retries' do
allow(import.file.blob).to receive(:download).and_raise(Timeout::Error)
expect(Rails.logger).to receive(:warn).exactly(3).times
expect(Rails.logger).to receive(:error).with('Download failed after 3 attempts')
expect { subject.call }.to raise_error(Timeout::Error)
end
end
context 'with file integrity issues' do
it 'raises error when file size mismatches' do
allow(import.file.blob).to receive(:byte_size).and_return(9999)
expect(Rails.logger).to receive(:error)
expect { subject.call }.to raise_error(/Incomplete download/)
end
it 'raises error when checksum mismatches' do
allow(import.file.blob).to receive(:checksum).and_return('invalid_checksum')
expect(Rails.logger).to receive(:error)
expect { subject.call }.to raise_error(/Checksum mismatch/)
end
end
context 'with invalid JSON' do
before do
import.file.attach(
io: StringIO.new('invalid json'),
filename: 'records.json',
content_type: 'application/json'
)
end
it 'logs and raises parse error' do
# Directly mock the standard error handling since the error happens during parsing
expect(Rails.logger).to receive(:error).with(/Download error: Empty input/)
expect { subject.call }.to raise_error(StandardError)
end
end
context 'with invalid data structure' do
before do
import.file.attach(
io: StringIO.new({ wrong_key: [] }.to_json),
filename: 'records.json',
content_type: 'application/json'
)
end
it 'returns early when locations key is missing' do
expect(GoogleMaps::RecordsImporter).not_to receive(:new)
subject.call
end
end
end
end

View file

@ -22,6 +22,10 @@ RSpec.describe Imports::Create do
receive(:new).with(import, user.id).and_return(double(call: true))
service.call
end
it 'updates the import points count' do
expect { service.call }.to have_enqueued_job(Import::UpdatePointsCountJob).with(import.id)
end
end
context 'when source is google_phone_takeout' do

View file

@ -13,7 +13,8 @@ RSpec.describe Imports::Destroy do
end
it 'enqueues a BulkStatsCalculatingJob' do
expect(BulkStatsCalculatingJob).to receive(:perform_later).with(user.id)
expect(Stats::BulkCalculator).to receive(:new).with(user.id).and_return(double(call: nil))
service.call
end
end