Add a rake task to import large files from Google Takeout

This commit is contained in:
Eugene Burmakin 2024-05-23 20:35:31 +02:00
parent 814095a4a2
commit d25468207d
17 changed files with 153 additions and 127 deletions

View file

@ -13,6 +13,10 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
- Sidekiq web interface to monitor background jobs is now available at `/sidekiq` - Sidekiq web interface to monitor background jobs is now available at `/sidekiq`
- Now you can choose a date range of points to be exported - Now you can choose a date range of points to be exported
---
## [0.2.6] — 2024-05-23
### Fixed ### Fixed
- Stop selecting `raw_data` column during requests to `imports` and `points` tables to improve performance. - Stop selecting `raw_data` column during requests to `imports` and `points` tables to improve performance.
@ -21,6 +25,10 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
- Rename PointsController to MapController along with all the views and routes - Rename PointsController to MapController along with all the views and routes
### Added
- Add Points page to display all the points as a table with pagination to allow users to delete points
--- ---
## [0.2.5] — 2024-05-21 ## [0.2.5] — 2024-05-21

View file

@ -0,0 +1,13 @@
# frozen_string_literal: true
class ImportGoogleTakeoutJob < ApplicationJob
queue_as :imports
def perform(import_id, json_string)
import = Import.find(import_id)
json = Oj.load(json_string)
GoogleMaps::RecordsParser.new(import).call(json)
end
end

View file

@ -1,5 +1,6 @@
# frozen_string_literal: true # frozen_string_literal: true
require 'redis_client'
class GoogleMaps::RecordsParser class GoogleMaps::RecordsParser
attr_reader :import attr_reader :import
@ -7,43 +8,28 @@ class GoogleMaps::RecordsParser
@import = import @import = import
end end
def call def call(json)
points_data = parse_json data = parse_json(json)
points = 0
points_data.each do |point_data|
next if Point.exists?(timestamp: point_data[:timestamp])
Point.create( Point.create(
latitude: point_data[:latitude], latitude: data[:latitude],
longitude: point_data[:longitude], longitude: data[:longitude],
timestamp: point_data[:timestamp], timestamp: data[:timestamp],
raw_data: point_data[:raw_data], raw_data: data[:raw_data],
topic: 'Google Maps Timeline Export', topic: 'Google Maps Timeline Export',
tracker_id: 'google-maps-timeline-export', tracker_id: 'google-maps-timeline-export',
import_id: import.id import_id: import.id
) )
points += 1
end
doubles = points_data.size - points
processed = points + doubles
{ raw_points: points_data.size, points:, doubles:, processed: }
end end
private private
def parse_json def parse_json(json)
import.raw_data['locations'].map do |record|
{ {
latitude: record['latitudeE7'].to_f / 10**7, latitude: json['latitudeE7'].to_f / 10**7,
longitude: record['longitudeE7'].to_f / 10**7, longitude: json['longitudeE7'].to_f / 10**7,
timestamp: DateTime.parse(record['timestamp']).to_i, timestamp: DateTime.parse(json['timestamp']).to_i,
raw_data: record raw_data: json
} }
end.reject(&:blank?)
end end
end end

View file

@ -31,7 +31,7 @@ class GoogleMaps::SemanticHistoryParser
doubles = points_data.size - points doubles = points_data.size - points
processed = points + doubles processed = points + doubles
{ raw_points: points_data.size, points: points, doubles: doubles, processed: processed } { raw_points: points_data.size, points:, doubles:, processed: }
end end
private private

View file

@ -0,0 +1,26 @@
# frozen_string_literal: true
require 'oj'
class StreamHandler < Oj::ScHandler
attr_reader :import_id
def initialize(import_id)
@import_id = import_id
@buffer = {}
end
def hash_start
{}
end
def hash_end
ImportGoogleTakeoutJob.perform_later(import_id, @buffer.to_json)
@buffer = {}
end
def hash_set(_buffer, key, value)
@buffer[key] = value
end
end

View file

@ -15,7 +15,7 @@
<div class="label"> <div class="label">
<span class="label-text">Select source</span> <span class="label-text">Select source</span>
</div> </div>
<%= form.collection_radio_buttons :source, Import.sources, :first, :first %> <%= form.collection_radio_buttons :source, Import.sources.except('google_records'), :first, :first %>
</label> </label>
<label class="form-control w-full max-w-xs my-5"> <label class="form-control w-full max-w-xs my-5">

View file

@ -1,6 +1,11 @@
<div class="mx-auto md:w-2/3 w-full"> <div class="mx-auto md:w-2/3 w-full">
<h1 class="font-bold text-4xl">New import</h1> <h1 class="font-bold text-4xl">New import</h1>
<div role="alert" class="alert alert-info my-5">
<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" class="stroke-current shrink-0 w-6 h-6"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M13 16h-1v-4h-1m1-4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z"></path></svg>
<span>To import <code>Records.json</code> file from your Google Takeout Archive, use rake task</span>
</div>
<%= render "form", import: @import %> <%= render "form", import: @import %>
<%= link_to "Back to imports", imports_path, class: "ml-2 rounded-lg py-3 px-5 bg-gray-100 inline-block font-medium" %> <%= link_to "Back to imports", imports_path, class: "ml-2 rounded-lg py-3 px-5 bg-gray-100 inline-block font-medium" %>

View file

@ -1,17 +0,0 @@
<%= form_with(model: point, class: "contents") do |form| %>
<% if point.errors.any? %>
<div id="error_explanation" class="bg-red-50 text-red-500 px-3 py-2 font-medium rounded-lg mt-3">
<h2><%= pluralize(point.errors.count, "error") %> prohibited this point from being saved:</h2>
<ul>
<% point.errors.each do |error| %>
<li><%= error.full_message %></li>
<% end %>
</ul>
</div>
<% end %>
<div class="inline">
<%= form.submit class: "rounded-lg py-3 px-5 bg-blue-600 text-white inline-block font-medium cursor-pointer" %>
</div>
<% end %>

View file

@ -1,8 +0,0 @@
<div class="mx-auto md:w-2/3 w-full">
<h1 class="font-bold text-4xl">Editing point</h1>
<%= render "form", point: @point %>
<%= link_to "Show this point", @point, class: "ml-2 rounded-lg py-3 px-5 bg-gray-100 inline-block font-medium" %>
<%= link_to "Back to points", points_path, class: "ml-2 rounded-lg py-3 px-5 bg-gray-100 inline-block font-medium" %>
</div>

View file

@ -1,7 +0,0 @@
<div class="mx-auto md:w-2/3 w-full">
<h1 class="font-bold text-4xl">New point</h1>
<%= render "form", point: @point %>
<%= link_to "Back to points", points_path, class: "ml-2 rounded-lg py-3 px-5 bg-gray-100 inline-block font-medium" %>
</div>

View file

@ -1,15 +0,0 @@
<div class="mx-auto md:w-2/3 w-full flex">
<div class="mx-auto">
<% if notice.present? %>
<p class="py-2 px-3 bg-green-50 mb-5 text-green-500 font-medium rounded-lg inline-block" id="notice"><%= notice %></p>
<% end %>
<%= render @point %>
<%= link_to "Edit this point", edit_point_path(@point), class: "mt-2 rounded-lg py-3 px-5 bg-gray-100 inline-block font-medium" %>
<%= link_to "Back to points", points_path, class: "ml-2 rounded-lg py-3 px-5 bg-gray-100 inline-block font-medium" %>
<div class="inline-block ml-2">
<%= button_to "Destroy this point", @point, method: :delete, class: "mt-2 rounded-lg py-3 px-5 bg-gray-100 font-medium" %>
</div>
</div>
</div>

View file

@ -0,0 +1,7 @@
# frozen_string_literal: true
module Reddis
def self.client
@client ||= Redis.new(url: ENV['REDIS_URL'])
end
end

25
lib/tasks/import.rake Normal file
View file

@ -0,0 +1,25 @@
# frozen_string_literal: true
# Usage: rake import:big_file['/path/to/file.json','user@email.com']
namespace :import do
desc 'Accepts a file path and user email and imports the data into the database'
task :big_file, %i[file_path user_email] => :environment do |_, args|
user = User.find_by(email: args[:user_email])
raise 'User not found' unless user
import = user.imports.create(name: args[:file_path], source: :google_records)
handler = StreamHandler.new(import.id)
pp "Importing #{args[:file_path]} for #{user.email}, file size is #{File.size(args[:file_path])}... This might take a while, have patience!"
File.open(args[:file_path], 'r') do |content|
Oj.sc_parse(handler, content)
end
pp "Imported #{args[:file_path]} for #{user.email} successfully!"
end
end

View file

@ -1,3 +1,5 @@
# frozen_string_literal: true
require 'rails_helper' require 'rails_helper'
RSpec.describe ImportJob, type: :job do RSpec.describe ImportJob, type: :job do

View file

@ -13,6 +13,7 @@ RSpec.describe 'Exports', type: :request do
it 'returns http success' do it 'returns http success' do
get '/export' get '/export'
expect(response).to have_http_status(:success) expect(response).to have_http_status(:success)
end end
end end

View file

@ -172,7 +172,7 @@ paths:
lat: 52.502397 lat: 52.502397
lon: 13.356718 lon: 13.356718
tid: Swagger tid: Swagger
tst: 1716487940 tst: 1716488929
servers: servers:
- url: http://{defaultHost} - url: http://{defaultHost}
variables: variables: