From d25468207db001e27d8f9ac8624eef796c6293e7 Mon Sep 17 00:00:00 2001 From: Eugene Burmakin Date: Thu, 23 May 2024 20:35:31 +0200 Subject: [PATCH] Add a rake task to import large files from Google Takeout --- CHANGELOG.md | 8 ++ app/jobs/import_google_takeout_job.rb | 13 +++ app/services/google_maps/records_parser.rb | 52 ++++------- .../google_maps/semantic_history_parser.rb | 2 +- app/services/stream_handler.rb | 26 ++++++ app/views/imports/_form.html.erb | 2 +- app/views/imports/new.html.erb | 5 ++ app/views/points/_form.html.erb | 17 ---- app/views/points/edit.html.erb | 8 -- app/views/points/new.html.erb | 7 -- app/views/points/show.html.erb | 15 ---- config/initializers/reddis.rb | 7 ++ lib/tasks/import.rake | 25 ++++++ spec/jobs/import_job_spec.rb | 2 + spec/requests/export_spec.rb | 1 + spec/services/own_tracks/params_spec.rb | 88 +++++++++---------- swagger/v1/swagger.yaml | 2 +- 17 files changed, 153 insertions(+), 127 deletions(-) create mode 100644 app/jobs/import_google_takeout_job.rb create mode 100644 app/services/stream_handler.rb delete mode 100644 app/views/points/_form.html.erb delete mode 100644 app/views/points/edit.html.erb delete mode 100644 app/views/points/new.html.erb delete mode 100644 app/views/points/show.html.erb create mode 100644 config/initializers/reddis.rb create mode 100644 lib/tasks/import.rake diff --git a/CHANGELOG.md b/CHANGELOG.md index 6cf71a75..1ea875e9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,10 @@ and this project adheres to [Semantic Versioning](http://semver.org/). - Sidekiq web interface to monitor background jobs is now available at `/sidekiq` - Now you can choose a date range of points to be exported +--- + +## [0.2.6] — 2024-05-23 + ### Fixed - Stop selecting `raw_data` column during requests to `imports` and `points` tables to improve performance. @@ -21,6 +25,10 @@ and this project adheres to [Semantic Versioning](http://semver.org/). - Rename PointsController to MapController along with all the views and routes +### Added + +- Add Points page to display all the points as a table with pagination to allow users to delete points + --- ## [0.2.5] — 2024-05-21 diff --git a/app/jobs/import_google_takeout_job.rb b/app/jobs/import_google_takeout_job.rb new file mode 100644 index 00000000..b06d867d --- /dev/null +++ b/app/jobs/import_google_takeout_job.rb @@ -0,0 +1,13 @@ +# frozen_string_literal: true + +class ImportGoogleTakeoutJob < ApplicationJob + queue_as :imports + + def perform(import_id, json_string) + import = Import.find(import_id) + + json = Oj.load(json_string) + + GoogleMaps::RecordsParser.new(import).call(json) + end +end diff --git a/app/services/google_maps/records_parser.rb b/app/services/google_maps/records_parser.rb index fae1a6d6..900a708b 100644 --- a/app/services/google_maps/records_parser.rb +++ b/app/services/google_maps/records_parser.rb @@ -1,5 +1,6 @@ # frozen_string_literal: true +require 'redis_client' class GoogleMaps::RecordsParser attr_reader :import @@ -7,43 +8,28 @@ class GoogleMaps::RecordsParser @import = import end - def call - points_data = parse_json + def call(json) + data = parse_json(json) - points = 0 - - points_data.each do |point_data| - next if Point.exists?(timestamp: point_data[:timestamp]) - - Point.create( - latitude: point_data[:latitude], - longitude: point_data[:longitude], - timestamp: point_data[:timestamp], - raw_data: point_data[:raw_data], - topic: 'Google Maps Timeline Export', - tracker_id: 'google-maps-timeline-export', - import_id: import.id - ) - - points += 1 - end - - doubles = points_data.size - points - processed = points + doubles - - { raw_points: points_data.size, points:, doubles:, processed: } + Point.create( + latitude: data[:latitude], + longitude: data[:longitude], + timestamp: data[:timestamp], + raw_data: data[:raw_data], + topic: 'Google Maps Timeline Export', + tracker_id: 'google-maps-timeline-export', + import_id: import.id + ) end private - def parse_json - import.raw_data['locations'].map do |record| - { - latitude: record['latitudeE7'].to_f / 10**7, - longitude: record['longitudeE7'].to_f / 10**7, - timestamp: DateTime.parse(record['timestamp']).to_i, - raw_data: record - } - end.reject(&:blank?) + def parse_json(json) + { + latitude: json['latitudeE7'].to_f / 10**7, + longitude: json['longitudeE7'].to_f / 10**7, + timestamp: DateTime.parse(json['timestamp']).to_i, + raw_data: json + } end end diff --git a/app/services/google_maps/semantic_history_parser.rb b/app/services/google_maps/semantic_history_parser.rb index af35fd1f..eb4ecb58 100644 --- a/app/services/google_maps/semantic_history_parser.rb +++ b/app/services/google_maps/semantic_history_parser.rb @@ -31,7 +31,7 @@ class GoogleMaps::SemanticHistoryParser doubles = points_data.size - points processed = points + doubles - { raw_points: points_data.size, points: points, doubles: doubles, processed: processed } + { raw_points: points_data.size, points:, doubles:, processed: } end private diff --git a/app/services/stream_handler.rb b/app/services/stream_handler.rb new file mode 100644 index 00000000..9136e521 --- /dev/null +++ b/app/services/stream_handler.rb @@ -0,0 +1,26 @@ +# frozen_string_literal: true + +require 'oj' + +class StreamHandler < Oj::ScHandler + attr_reader :import_id + + def initialize(import_id) + @import_id = import_id + @buffer = {} + end + + def hash_start + {} + end + + def hash_end + ImportGoogleTakeoutJob.perform_later(import_id, @buffer.to_json) + + @buffer = {} + end + + def hash_set(_buffer, key, value) + @buffer[key] = value + end +end diff --git a/app/views/imports/_form.html.erb b/app/views/imports/_form.html.erb index 71e3931c..5ae8f89c 100644 --- a/app/views/imports/_form.html.erb +++ b/app/views/imports/_form.html.erb @@ -15,7 +15,7 @@
Select source
- <%= form.collection_radio_buttons :source, Import.sources, :first, :first %> + <%= form.collection_radio_buttons :source, Import.sources.except('google_records'), :first, :first %>