Implement support for storing import files in S3.

This commit is contained in:
Eugene Burmakin 2025-03-23 22:00:41 +01:00
parent 477ef709c3
commit 1e54d87d53
8 changed files with 52 additions and 18 deletions

View file

@ -9,8 +9,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
## TODO:
- Migrate existing imports from `raw_data` to the new file storage.
- Delete import files when import is deleted.
- Stream import files for parsing instead of downloading them.
- Add randomized name to the import files before attaching them to the import record.
## Changed

View file

@ -5,6 +5,10 @@ git_source(:github) { |repo| "https://github.com/#{repo}.git" }
ruby File.read('.ruby-version').strip
# https://meta.discourse.org/t/cant-rebuild-due-to-aws-sdk-gem-bump-and-new-aws-data-integrity-protections/354217/40
gem 'aws-sdk-s3', '~> 1.177.0', require: false
gem 'aws-sdk-core', '~> 3.215.1', require: false
gem 'aws-sdk-kms', '~> 1.96.0', require: false
gem 'bootsnap', require: false
gem 'chartkick'
gem 'data_migrate'

View file

@ -79,6 +79,22 @@ GEM
public_suffix (>= 2.0.2, < 7.0)
ast (2.4.2)
attr_extras (7.1.0)
aws-eventstream (1.3.2)
aws-partitions (1.1072.0)
aws-sdk-core (3.215.1)
aws-eventstream (~> 1, >= 1.3.0)
aws-partitions (~> 1, >= 1.992.0)
aws-sigv4 (~> 1.9)
jmespath (~> 1, >= 1.6.1)
aws-sdk-kms (1.96.0)
aws-sdk-core (~> 3, >= 3.210.0)
aws-sigv4 (~> 1.5)
aws-sdk-s3 (1.177.0)
aws-sdk-core (~> 3, >= 3.210.0)
aws-sdk-kms (~> 1)
aws-sigv4 (~> 1.5)
aws-sigv4 (1.11.0)
aws-eventstream (~> 1, >= 1.0.2)
base64 (0.2.0)
bcrypt (3.1.20)
benchmark (0.4.0)
@ -91,7 +107,6 @@ GEM
coderay (1.1.3)
concurrent-ruby (1.3.5)
connection_pool (2.5.0)
content_disposition (1.0.0)
crack (1.0.0)
bigdecimal
rexml
@ -121,8 +136,6 @@ GEM
dotenv-rails (3.1.7)
dotenv (= 3.1.7)
railties (>= 6.1)
down (5.4.2)
addressable (~> 2.8)
drb (2.2.1)
erubi (1.13.1)
et-orbi (1.2.11)
@ -164,6 +177,7 @@ GEM
pp (>= 0.6.0)
rdoc (>= 4.0.0)
reline (>= 0.4.2)
jmespath (1.6.2)
json (2.10.1)
json-schema (5.0.1)
addressable (~> 2.8)
@ -371,9 +385,6 @@ GEM
securerandom (0.4.1)
shoulda-matchers (6.4.0)
activesupport (>= 5.2.0)
shrine (3.6.0)
content_disposition (~> 1.0)
down (~> 5.1)
sidekiq (7.3.9)
base64
connection_pool (>= 2.3.0)
@ -453,6 +464,9 @@ PLATFORMS
DEPENDENCIES
activerecord-postgis-adapter
aws-sdk-core (~> 3.215.1)
aws-sdk-kms (~> 1.96.0)
aws-sdk-s3 (~> 1.177.0)
bootsnap
chartkick
data_migrate
@ -488,7 +502,6 @@ DEPENDENCIES
rswag-ui
rubocop-rails
shoulda-matchers
shrine (~> 3.6)
sidekiq
sidekiq-cron
sidekiq-limit_fetch

View file

@ -9,6 +9,7 @@ class Import < ApplicationRecord
has_one_attached :file
after_commit -> { Import::ProcessJob.perform_later(id) }, on: :create
after_commit -> { file.purge }, on: :destroy
enum :source, {
google_semantic_history: 0, owntracks: 1, google_records: 2,

View file

@ -63,10 +63,14 @@ class GoogleMaps::SemanticHistoryParser
end
def parse_json
import.raw_data['timelineObjects'].flat_map do |timeline_object|
import.file.download do |f|
json = Oj.load(f)
json['timelineObjects'].flat_map do |timeline_object|
parse_timeline_object(timeline_object)
end.compact
end
end
def parse_timeline_object(timeline_object)
if timeline_object['activitySegment'].present?

View file

@ -98,4 +98,6 @@ Rails.application.configure do
config.logger = Logger.new($stdout)
config.lograge.enabled = true
config.lograge.formatter = Lograge::Formatters::Json.new
config.active_storage.service = ENV['SELF_HOSTED'] == 'true' ? :local : :s3
end

View file

@ -0,0 +1,11 @@
# frozen_string_literal: true
require 'aws-sdk-core'
Aws.config.update(
{
region: ENV['AWS_REGION'],
endpoint: ENV['AWS_ENDPOINT'],
credentials: Aws::Credentials.new(ENV['AWS_ACCESS_KEY_ID'], ENV['AWS_SECRET_ACCESS_KEY'])
}
)

View file

@ -6,13 +6,12 @@ local:
service: Disk
root: <%= Rails.root.join("storage") %>
# Use bin/rails credentials:edit to set the AWS secrets (as aws:access_key_id|secret_access_key)
# amazon:
# service: S3
# access_key_id: <%= Rails.application.credentials.dig(:aws, :access_key_id) %>
# secret_access_key: <%= Rails.application.credentials.dig(:aws, :secret_access_key) %>
# region: us-east-1
# bucket: your_own_bucket-<%= Rails.env %>
s3:
service: S3
access_key_id: <%= ENV.fetch("AWS_ACCESS_KEY_ID") %>
secret_access_key: <%= ENV.fetch("AWS_SECRET_ACCESS_KEY") %>
region: <%= ENV.fetch("AWS_REGION") %>
bucket: <%= ENV.fetch("AWS_BUCKET") %>
# Remember not to checkin your GCS keyfile to a repository
# google: