Rework Google Phone Takeout import to support location-history.json another file format

This commit is contained in:
Eugene Burmakin 2024-07-08 18:05:02 +02:00
parent 47f3e55849
commit e1648c5cb0
8 changed files with 297 additions and 69 deletions

View file

@ -5,12 +5,49 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](http://keepachangelog.com/)
and this project adheres to [Semantic Versioning](http://semver.org/).
## [0.8.4] — 2024-07-08
### Added
- Support for multiple hosts. Now you can specify the host of the application by setting the `APPLICATION_HOSTS` (note plural form) environment variable in the `docker-compose.yml` file. Example:
```yaml
dawarich_app:
image: freikin/dawarich:latest
container_name: dawarich_app
environment:
APPLICATION_HOSTS: "yourhost.com,www.yourhost.com,127.0.0.1"
```
Note, there should be no protocol prefixes in the `APPLICATION_HOSTS` variable, only the hostnames.
⚠️ It would also be better to migrate your current `APPLICATION_HOST` to `APPLICATION_HOSTS` to avoid any issues in the future, as `APPLICATION_HOST` will be deprecated in the nearest future. ⚠️
- Support for HTTPS. Now you can specify the protocol of the application by setting the `APPLICATION_PROTOCOL` environment variable in the `docker-compose.yml` file. Default value is `http` Example:
```yaml
dawarich_app:
image: freikin/dawarich:latest
container_name: dawarich_app
environment:
APPLICATION_PROTOCOL: "https"
```
### Fixed
- Support for a `location-history.json` file from Google Takeout. It turned out, this file could contain not only an object with location data history, but also an array of objets with location data history. Now Dawarich can handle both cases and import the data correctly.
---
## [0.8.3] — 2024-07-03
### Added
- Notifications system. Now you will receive a notification when an import or export is finished, when stats update is completed and if any error occurs during any of these processes. Notifications are displayed in the top right corner of the screen and are stored in the database. You can see all your notifications on the Notifications page.
- Swagger API docs for /api/v1/owntracks/points You can find the API docs at `/api-docs`.
- Swagger API docs for `/api/v1/owntracks/points`. You can find the API docs at `/api-docs`.
---

View file

@ -4,8 +4,8 @@ class GoogleMaps::PhoneTakeoutParser
attr_reader :import, :user_id
def initialize(import, user_id)
@import = import
@user_id = user_id
@import = import
@user_id = user_id
end
def call
@ -15,23 +15,23 @@ class GoogleMaps::PhoneTakeoutParser
points_data.compact.each do |point_data|
next if Point.exists?(
timestamp: point_data[:timestamp],
latitude: point_data[:latitude],
longitude: point_data[:longitude],
timestamp: point_data[:timestamp],
latitude: point_data[:latitude],
longitude: point_data[:longitude],
user_id:
)
Point.create(
latitude: point_data[:latitude],
longitude: point_data[:longitude],
timestamp: point_data[:timestamp],
raw_data: point_data[:raw_data],
accuracy: point_data[:accuracy],
altitude: point_data[:altitude],
velocity: point_data[:velocity],
topic: 'Google Maps Phone Timeline Export',
latitude: point_data[:latitude],
longitude: point_data[:longitude],
timestamp: point_data[:timestamp],
raw_data: point_data[:raw_data],
accuracy: point_data[:accuracy],
altitude: point_data[:altitude],
velocity: point_data[:velocity],
import_id: import.id,
topic: 'Google Maps Phone Timeline Export',
tracker_id: 'google-maps-phone-timeline-export',
import_id: import.id,
user_id:
)
@ -47,49 +47,30 @@ class GoogleMaps::PhoneTakeoutParser
private
def parse_json
semantic_segments = import.raw_data['semanticSegments'].flat_map do |segment|
if segment.key?('timelinePath')
segment['timelinePath'].map do |point|
lat, lon = parse_coordinates(point['point'])
timestamp = DateTime.parse(point['time']).to_i
# location-history.json could contain an array of data points
# or an object with semanticSegments, rawSignals and rawArray
# I guess there are no easy ways with Google since these two are
# 3rd and 4th formats of their location data exports
semantic_segments = []
raw_signals = []
raw_array = []
point_hash(lat, lon, timestamp, segment)
end
elsif segment.key?('visit')
lat, lon = parse_coordinates(segment['visit']['topCandidate']['placeLocation']['latLng'])
timestamp = DateTime.parse(segment['startTime']).to_i
point_hash(lat, lon, timestamp, segment)
else # activities
# Some activities don't have start latLng
next if segment.dig('activity', 'start', 'latLng').nil?
start_lat, start_lon = parse_coordinates(segment['activity']['start']['latLng'])
start_timestamp = DateTime.parse(segment['startTime']).to_i
end_lat, end_lon = parse_coordinates(segment['activity']['end']['latLng'])
end_timestamp = DateTime.parse(segment['endTime']).to_i
[
point_hash(start_lat, start_lon, start_timestamp, segment),
point_hash(end_lat, end_lon, end_timestamp, segment)
]
end
if import.raw_data.is_a?(Array)
raw_array = parse_raw_array(import.raw_data)
else
semantic_segments = parse_semantic_segments(import.raw_data['semanticSegments']) if import.raw_data['semanticSegments']
raw_signals = parse_raw_signals(import.raw_data['rawSignals']) if import.raw_data['rawSignals']
end
raw_signals = import.raw_data['rawSignals'].flat_map do |segment|
next unless segment.dig('position', 'LatLng')
lat, lon = parse_coordinates(segment['position']['LatLng'])
timestamp = DateTime.parse(segment['position']['timestamp']).to_i
point_hash(lat, lon, timestamp, segment)
end
semantic_segments + raw_signals
semantic_segments + raw_signals + raw_array
end
def parse_coordinates(coordinates)
coordinates.split(', ').map { _1.chomp('°') }
if coordinates.include?('°')
coordinates.split(', ').map { _1.chomp('°') }
else
coordinates.delete('geo:').split(',')
end
end
def point_hash(lat, lon, timestamp, raw_data)
@ -103,4 +84,101 @@ class GoogleMaps::PhoneTakeoutParser
velocitu: raw_data['speedMetersPerSecond']
}
end
def parse_visit_place_location(data_point)
lat, lon = parse_coordinates(data_point['visit']['topCandidate']['placeLocation'])
timestamp = DateTime.parse(data_point['startTime']).to_i
point_hash(lat, lon, timestamp, data_point)
end
def parse_activity(data_point)
start_lat, start_lon = parse_coordinates(data_point['activity']['start'])
start_timestamp = DateTime.parse(data_point['startTime']).to_i
end_lat, end_lon = parse_coordinates(data_point['activity']['end'])
end_timestamp = DateTime.parse(data_point['endTime']).to_i
[
point_hash(start_lat, start_lon, start_timestamp, data_point),
point_hash(end_lat, end_lon, end_timestamp, data_point)
]
end
def parse_timeline_path(data_point)
data_point['timelinePath'].map do |point|
lat, lon = parse_coordinates(point['point'])
start_time = DateTime.parse(data_point['startTime'])
offset = point['durationMinutesOffsetFromStartTime']
timestamp = offset.nil? ? start_time.to_i : start_time + point['durationMinutesOffsetFromStartTime'].to_i
point_hash(lat, lon, timestamp, data_point)
end
end
def parse_semantic_visit(segment)
lat, lon = parse_coordinates(segment['visit']['topCandidate']['placeLocation']['latLng'])
timestamp = DateTime.parse(segment['startTime']).to_i
point_hash(lat, lon, timestamp, segment)
end
def parse_semantic_activity(segment)
start_lat, start_lon = parse_coordinates(segment['activity']['start']['latLng'])
start_timestamp = DateTime.parse(segment['startTime']).to_i
end_lat, end_lon = parse_coordinates(segment['activity']['end']['latLng'])
end_timestamp = DateTime.parse(segment['endTime']).to_i
[
point_hash(start_lat, start_lon, start_timestamp, segment),
point_hash(end_lat, end_lon, end_timestamp, segment)
]
end
def parse_semantic_timeline_path(segment)
segment['timelinePath'].map do |point|
lat, lon = parse_coordinates(point['point'])
timestamp = DateTime.parse(point['time']).to_i
point_hash(lat, lon, timestamp, segment)
end
end
def parse_raw_array(raw_data)
raw_data.map do |data_point|
if data_point.dig('visit', 'topCandidate', 'placeLocation')
parse_visit_place_location(data_point)
elsif data_point.dig('activity', 'start') && data_point.dig('activity', 'end')
parse_activity(data_point)
elsif data_point['timelinePath']
parse_timeline_path(data_point)
end
end.flatten.compact
end
def parse_semantic_segments(semantic_segments)
semantic_segments.flat_map do |segment|
if segment.key?('timelinePath')
parse_semantic_timeline_path(segment)
elsif segment.key?('visit')
parse_semantic_visit(segment)
else # activities
# Some activities don't have start latLng
next if segment.dig('activity', 'start', 'latLng').nil?
parse_semantic_activity(segment)
end
end
end
def parse_raw_signals(raw_signals)
raw_signals.flat_map do |segment|
next unless segment.dig('position', 'LatLng')
lat, lon = parse_coordinates(segment['position']['LatLng'])
timestamp = DateTime.parse(segment['position']['timestamp']).to_i
point_hash(lat, lon, timestamp, segment)
end
end
end

View file

@ -1,11 +1,9 @@
<div id="<%= dom_id import %>">
<table class="table">
<!-- head -->
<thead>
<tr>
<th>Name</th>
<th>Processed</th>
<th>Doubles</th>
<th>Imported points</th>
<th>Created at</th>
</tr>
</thead>
@ -15,10 +13,8 @@
<%= link_to import.name, import, class: 'underline hover:no-underline' %> (<%= import.source %>)
</td>
<td>
<%= "✅" if import.processed == import.raw_points %>
<%= "#{import.processed}/#{import.raw_points}" %>
<%= "#{number_with_delimiter import.points.size}" %>
</td>
<td><%= import.doubles %></td>
<td><%= import.created_at.strftime("%d.%m.%Y, %H:%M") %></td>
</tr>
</tbody>

View file

@ -1,4 +1,6 @@
require "active_support/core_ext/integer/time"
# frozen_string_literal: true
require 'active_support/core_ext/integer/time'
Rails.application.configure do
# Settings specified here will take precedence over those in config/application.rb.
@ -19,13 +21,13 @@ Rails.application.configure do
# Enable/disable caching. By default caching is disabled.
# Run rails dev:cache to toggle caching.
if Rails.root.join("tmp/caching-dev.txt").exist?
if Rails.root.join('tmp/caching-dev.txt').exist?
config.action_controller.perform_caching = true
config.action_controller.enable_fragment_cache_logging = true
config.cache_store = :redis_cache_store, { url: ENV['REDIS_URL'] }
config.public_file_server.headers = {
"Cache-Control" => "public, max-age=#{2.days.to_i}"
'Cache-Control' => "public, max-age=#{2.days.to_i}"
}
else
config.action_controller.perform_caching = false
@ -73,6 +75,11 @@ Rails.application.configure do
# Raise error when a before_action's only/except options reference missing actions
config.action_controller.raise_on_missing_callback_actions = true
config.action_mailer.default_url_options = { host: ENV.fetch("APPLICATION_HOST", "localhost"), port: 3000 }
config.hosts << ENV.fetch("APPLICATION_HOST", "localhost")
config.action_mailer.default_url_options = { host: ENV.fetch('APPLICATION_HOST', 'localhost'), port: 3000 }
config.hosts << ENV.fetch('APPLICATION_HOST', 'localhost')
hosts = ENV.fetch("APPLICATION_HOSTS", ['localhost'])
config.hosts.concat(hosts.split(',')) if hosts.present?
config.force_ssl = ENV.fetch('APPLICATION_PROTOCOL', 'http').downcase == 'https'
end

View file

@ -44,7 +44,9 @@ services:
DATABASE_NAME: dawarich_development
MIN_MINUTES_SPENT_IN_CITY: 60
APPLICATION_HOST: localhost
APPLICATION_HOSTS: localhost
TIME_ZONE: Europe/London
APPLICATION_PROTOCOL: http
depends_on:
- dawarich_db
- dawarich_redis
@ -69,7 +71,9 @@ services:
DATABASE_PASSWORD: password
DATABASE_NAME: dawarich_development
APPLICATION_HOST: localhost
APPLICATION_HOSTS: localhost
BACKGROUND_PROCESSING_CONCURRENCY: 10
APPLICATION_PROTOCOL: http
depends_on:
- dawarich_db
- dawarich_redis

View file

@ -0,0 +1,89 @@
[
{
"endTime" : "2023-08-27T17:04:26.999-05:00",
"startTime" : "2023-08-27T15:48:56.000-05:00",
"visit" : {
"hierarchyLevel" : "0",
"topCandidate" : {
"probability" : "0.785181",
"semanticType" : "Unknown",
"placeID" : "ChIJxxP_Qwb2aIYRTwDNDLkUmD0",
"placeLocation" : "geo:27.720022,-97.347951"
},
"probability" : "0.710000"
}
},
{
"endTime" : "2023-08-27T22:00:00.000Z",
"startTime" : "2023-08-27T20:00:00.000Z",
"timelinePath" : [
{
"point" : "geo:27.720007,-97.348044",
"durationMinutesOffsetFromStartTime" : "49"
}
]
},
{
"endTime" : "2023-09-02T23:25:59.000-06:00",
"startTime" : "2023-08-27T14:48:56.000-06:00",
"timelineMemory" : {
"destinations" : [
{
"identifier" : "ChIJs9KSYYBfaIYRj5AOiZNQ0a4"
},
{
"identifier" : "ChIJw6lCfj2sZ4YRl6q2LNNyojk"
},
{
"identifier" : "ChIJA89FstRIAYcRr9I2aBzR89A"
},
{
"identifier" : "ChIJtWVg4r5DFIcRr0zkOeDPEfY"
}
],
"distanceFromOriginKms" : "1594"
}
},
{
"endTime" : "2023-08-28T00:00:00.000Z",
"startTime" : "2023-08-27T22:00:00.000Z",
"timelinePath" : [
{
"point" : "geo:27.701123,-97.362988",
"durationMinutesOffsetFromStartTime" : "4"
},
{
"point" : "geo:27.701123,-97.362988",
"durationMinutesOffsetFromStartTime" : "4"
},
{
"point" : "geo:27.687173,-97.363743",
"durationMinutesOffsetFromStartTime" : "7"
},
{
"point" : "geo:27.686129,-97.381865",
"durationMinutesOffsetFromStartTime" : "10"
},
{
"point" : "geo:27.686129,-97.381865",
"durationMinutesOffsetFromStartTime" : "10"
},
{
"point" : "geo:27.686129,-97.381865",
"durationMinutesOffsetFromStartTime" : "108"
},
{
"point" : "geo:27.696576,-97.376949",
"durationMinutesOffsetFromStartTime" : "109"
},
{
"point" : "geo:27.709617,-97.375988",
"durationMinutesOffsetFromStartTime" : "112"
},
{
"point" : "geo:27.709617,-97.375988",
"durationMinutesOffsetFromStartTime" : "112"
}
]
}
]

View file

@ -1,3 +1,5 @@
// This file contains 3 doubles
{
"semanticSegments": [
{

View file

@ -7,13 +7,28 @@ RSpec.describe GoogleMaps::PhoneTakeoutParser do
subject(:parser) { described_class.new(import, user.id).call }
let(:user) { create(:user) }
let(:file_path) { Rails.root.join('spec/fixtures/files/google/phone-takeout.json') }
let(:raw_data) { JSON.parse(File.read(file_path)) }
let(:import) { create(:import, user:, name: 'phone_takeout.json', raw_data:) }
context 'when file exists' do
it 'creates points' do
expect { parser }.to change { Point.count }.by(4)
context 'when file content is an object' do
let(:file_path) { Rails.root.join('spec/fixtures/files/google/phone-takeout.json') }
let(:raw_data) { JSON.parse(File.read(file_path)) }
let(:import) { create(:import, user:, name: 'phone_takeout.json', raw_data:) }
context 'when file exists' do
it 'creates points' do
expect { parser }.to change { Point.count }.by(4)
end
end
end
context 'when file content is an array' do
let(:file_path) { Rails.root.join('spec/fixtures/files/google/location-history.json') }
let(:raw_data) { JSON.parse(File.read(file_path)) }
let(:import) { create(:import, user:, name: 'phone_takeout.json', raw_data:) }
context 'when file exists' do
it 'creates points' do
expect { parser }.to change { Point.count }.by(8)
end
end
end
end