Add a rake task to import large files from Google Takeout

This commit is contained in:
Eugene Burmakin 2024-05-23 20:35:31 +02:00
parent 814095a4a2
commit d25468207d
17 changed files with 153 additions and 127 deletions

View file

@ -13,6 +13,10 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
- Sidekiq web interface to monitor background jobs is now available at `/sidekiq`
- Now you can choose a date range of points to be exported
---
## [0.2.6] — 2024-05-23
### Fixed
- Stop selecting `raw_data` column during requests to `imports` and `points` tables to improve performance.
@ -21,6 +25,10 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
- Rename PointsController to MapController along with all the views and routes
### Added
- Add Points page to display all the points as a table with pagination to allow users to delete points
---
## [0.2.5] — 2024-05-21

View file

@ -0,0 +1,13 @@
# frozen_string_literal: true
class ImportGoogleTakeoutJob < ApplicationJob
queue_as :imports
def perform(import_id, json_string)
import = Import.find(import_id)
json = Oj.load(json_string)
GoogleMaps::RecordsParser.new(import).call(json)
end
end

View file

@ -1,5 +1,6 @@
# frozen_string_literal: true
require 'redis_client'
class GoogleMaps::RecordsParser
attr_reader :import
@ -7,43 +8,28 @@ class GoogleMaps::RecordsParser
@import = import
end
def call
points_data = parse_json
def call(json)
data = parse_json(json)
points = 0
points_data.each do |point_data|
next if Point.exists?(timestamp: point_data[:timestamp])
Point.create(
latitude: point_data[:latitude],
longitude: point_data[:longitude],
timestamp: point_data[:timestamp],
raw_data: point_data[:raw_data],
topic: 'Google Maps Timeline Export',
tracker_id: 'google-maps-timeline-export',
import_id: import.id
)
points += 1
end
doubles = points_data.size - points
processed = points + doubles
{ raw_points: points_data.size, points:, doubles:, processed: }
Point.create(
latitude: data[:latitude],
longitude: data[:longitude],
timestamp: data[:timestamp],
raw_data: data[:raw_data],
topic: 'Google Maps Timeline Export',
tracker_id: 'google-maps-timeline-export',
import_id: import.id
)
end
private
def parse_json
import.raw_data['locations'].map do |record|
{
latitude: record['latitudeE7'].to_f / 10**7,
longitude: record['longitudeE7'].to_f / 10**7,
timestamp: DateTime.parse(record['timestamp']).to_i,
raw_data: record
}
end.reject(&:blank?)
def parse_json(json)
{
latitude: json['latitudeE7'].to_f / 10**7,
longitude: json['longitudeE7'].to_f / 10**7,
timestamp: DateTime.parse(json['timestamp']).to_i,
raw_data: json
}
end
end

View file

@ -31,7 +31,7 @@ class GoogleMaps::SemanticHistoryParser
doubles = points_data.size - points
processed = points + doubles
{ raw_points: points_data.size, points: points, doubles: doubles, processed: processed }
{ raw_points: points_data.size, points:, doubles:, processed: }
end
private

View file

@ -0,0 +1,26 @@
# frozen_string_literal: true
require 'oj'
class StreamHandler < Oj::ScHandler
attr_reader :import_id
def initialize(import_id)
@import_id = import_id
@buffer = {}
end
def hash_start
{}
end
def hash_end
ImportGoogleTakeoutJob.perform_later(import_id, @buffer.to_json)
@buffer = {}
end
def hash_set(_buffer, key, value)
@buffer[key] = value
end
end

View file

@ -15,7 +15,7 @@
<div class="label">
<span class="label-text">Select source</span>
</div>
<%= form.collection_radio_buttons :source, Import.sources, :first, :first %>
<%= form.collection_radio_buttons :source, Import.sources.except('google_records'), :first, :first %>
</label>
<label class="form-control w-full max-w-xs my-5">

View file

@ -1,6 +1,11 @@
<div class="mx-auto md:w-2/3 w-full">
<h1 class="font-bold text-4xl">New import</h1>
<div role="alert" class="alert alert-info my-5">
<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" class="stroke-current shrink-0 w-6 h-6"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M13 16h-1v-4h-1m1-4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z"></path></svg>
<span>To import <code>Records.json</code> file from your Google Takeout Archive, use rake task</span>
</div>
<%= render "form", import: @import %>
<%= link_to "Back to imports", imports_path, class: "ml-2 rounded-lg py-3 px-5 bg-gray-100 inline-block font-medium" %>

View file

@ -1,17 +0,0 @@
<%= form_with(model: point, class: "contents") do |form| %>
<% if point.errors.any? %>
<div id="error_explanation" class="bg-red-50 text-red-500 px-3 py-2 font-medium rounded-lg mt-3">
<h2><%= pluralize(point.errors.count, "error") %> prohibited this point from being saved:</h2>
<ul>
<% point.errors.each do |error| %>
<li><%= error.full_message %></li>
<% end %>
</ul>
</div>
<% end %>
<div class="inline">
<%= form.submit class: "rounded-lg py-3 px-5 bg-blue-600 text-white inline-block font-medium cursor-pointer" %>
</div>
<% end %>

View file

@ -1,8 +0,0 @@
<div class="mx-auto md:w-2/3 w-full">
<h1 class="font-bold text-4xl">Editing point</h1>
<%= render "form", point: @point %>
<%= link_to "Show this point", @point, class: "ml-2 rounded-lg py-3 px-5 bg-gray-100 inline-block font-medium" %>
<%= link_to "Back to points", points_path, class: "ml-2 rounded-lg py-3 px-5 bg-gray-100 inline-block font-medium" %>
</div>

View file

@ -1,7 +0,0 @@
<div class="mx-auto md:w-2/3 w-full">
<h1 class="font-bold text-4xl">New point</h1>
<%= render "form", point: @point %>
<%= link_to "Back to points", points_path, class: "ml-2 rounded-lg py-3 px-5 bg-gray-100 inline-block font-medium" %>
</div>

View file

@ -1,15 +0,0 @@
<div class="mx-auto md:w-2/3 w-full flex">
<div class="mx-auto">
<% if notice.present? %>
<p class="py-2 px-3 bg-green-50 mb-5 text-green-500 font-medium rounded-lg inline-block" id="notice"><%= notice %></p>
<% end %>
<%= render @point %>
<%= link_to "Edit this point", edit_point_path(@point), class: "mt-2 rounded-lg py-3 px-5 bg-gray-100 inline-block font-medium" %>
<%= link_to "Back to points", points_path, class: "ml-2 rounded-lg py-3 px-5 bg-gray-100 inline-block font-medium" %>
<div class="inline-block ml-2">
<%= button_to "Destroy this point", @point, method: :delete, class: "mt-2 rounded-lg py-3 px-5 bg-gray-100 font-medium" %>
</div>
</div>
</div>

View file

@ -0,0 +1,7 @@
# frozen_string_literal: true
module Reddis
def self.client
@client ||= Redis.new(url: ENV['REDIS_URL'])
end
end

25
lib/tasks/import.rake Normal file
View file

@ -0,0 +1,25 @@
# frozen_string_literal: true
# Usage: rake import:big_file['/path/to/file.json','user@email.com']
namespace :import do
desc 'Accepts a file path and user email and imports the data into the database'
task :big_file, %i[file_path user_email] => :environment do |_, args|
user = User.find_by(email: args[:user_email])
raise 'User not found' unless user
import = user.imports.create(name: args[:file_path], source: :google_records)
handler = StreamHandler.new(import.id)
pp "Importing #{args[:file_path]} for #{user.email}, file size is #{File.size(args[:file_path])}... This might take a while, have patience!"
File.open(args[:file_path], 'r') do |content|
Oj.sc_parse(handler, content)
end
pp "Imported #{args[:file_path]} for #{user.email} successfully!"
end
end

View file

@ -1,3 +1,5 @@
# frozen_string_literal: true
require 'rails_helper'
RSpec.describe ImportJob, type: :job do

View file

@ -13,6 +13,7 @@ RSpec.describe 'Exports', type: :request do
it 'returns http success' do
get '/export'
expect(response).to have_http_status(:success)
end
end

View file

@ -13,51 +13,51 @@ RSpec.describe OwnTracks::Params do
let(:expected_json) do
{
latitude: 40.7128,
longitude: -74.006,
battery_status: 'unknown',
battery: 85,
ping: nil,
altitude: 41,
accuracy: 8,
vertical_accuracy: 3,
velocity: nil,
connection: 'wifi',
ssid: 'Home Wifi',
bssid: 'b0:f2:8:45:94:33',
trigger: 'unknown',
tracker_id: 'RO',
timestamp: 1706965203,
inrids: ['5f1d1b'],
in_regions: ['home'],
topic: 'owntracks/test/iPhone 12 Pro',
raw_data: {
'batt'=>85,
'lon'=>-74.006,
'acc'=>8,
'bs'=>2,
'inrids'=>['5f1d1b'],
'BSSID'=>'b0:f2:8:45:94:33',
'SSID'=>'Home Wifi',
'vac'=>3,
'inregions'=>['home'],
'lat'=>40.7128,
'topic'=>'owntracks/test/iPhone 12 Pro',
't'=>'p',
'conn'=>'w',
'm'=>1,
'tst'=>1706965203,
'alt'=>41,
'_type'=>'location',
'tid'=>'RO',
'_http'=>true,
'ghash'=>'u33d773',
'isorcv'=>'2024-02-03T13:00:03Z',
'isotst'=>'2024-02-03T13:00:03Z',
'disptst'=>'2024-02-03 13:00:03'
}
latitude: 40.7128,
longitude: -74.006,
battery_status: 'unknown',
battery: 85,
ping: nil,
altitude: 41,
accuracy: 8,
vertical_accuracy: 3,
velocity: nil,
connection: 'wifi',
ssid: 'Home Wifi',
bssid: 'b0:f2:8:45:94:33',
trigger: 'unknown',
tracker_id: 'RO',
timestamp: 1706965203,
inrids: ['5f1d1b'],
in_regions: ['home'],
topic: 'owntracks/test/iPhone 12 Pro',
raw_data: {
'batt' => 85,
'lon' => -74.006,
'acc' => 8,
'bs' => 2,
'inrids' => ['5f1d1b'],
'BSSID' => 'b0:f2:8:45:94:33',
'SSID' => 'Home Wifi',
'vac' => 3,
'inregions' => ['home'],
'lat' => 40.7128,
'topic' => 'owntracks/test/iPhone 12 Pro',
't' => 'p',
'conn' => 'w',
'm' => 1,
'tst' => 1706965203,
'alt' => 41,
'_type' => 'location',
'tid' => 'RO',
'_http' => true,
'ghash' => 'u33d773',
'isorcv' => '2024-02-03T13:00:03Z',
'isotst' => '2024-02-03T13:00:03Z',
'disptst' => '2024-02-03 13:00:03'
}
end
}
end
it 'returns parsed params' do
expect(params).to eq(expected_json)

View file

@ -172,7 +172,7 @@ paths:
lat: 52.502397
lon: 13.356718
tid: Swagger
tst: 1716487940
tst: 1716488929
servers:
- url: http://{defaultHost}
variables: