mirror of
https://github.com/Freika/dawarich.git
synced 2026-01-10 17:21:38 -05:00
Raw data archivation
This commit is contained in:
parent
44d8aee468
commit
9d93e5df7c
15 changed files with 3055 additions and 1 deletions
2259
RAW_DATA_ARCHIVAL_PLAN.md
Normal file
2259
RAW_DATA_ARCHIVAL_PLAN.md
Normal file
File diff suppressed because it is too large
Load diff
19
app/jobs/points/raw_data/archive_job.rb
Normal file
19
app/jobs/points/raw_data/archive_job.rb
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Points
|
||||
module RawData
|
||||
class ArchiveJob < ApplicationJob
|
||||
queue_as :default
|
||||
|
||||
def perform
|
||||
stats = Points::RawData::Archiver.new.call
|
||||
|
||||
Rails.logger.info("Archive job complete: #{stats}")
|
||||
rescue StandardError => e
|
||||
Rails.logger.error("Archive job failed: #{e.message}")
|
||||
Sentry.capture_exception(e) if defined?(Sentry)
|
||||
raise
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
19
app/jobs/points/raw_data/re_archive_month_job.rb
Normal file
19
app/jobs/points/raw_data/re_archive_month_job.rb
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Points
|
||||
module RawData
|
||||
class ReArchiveMonthJob < ApplicationJob
|
||||
queue_as :default
|
||||
|
||||
def perform(user_id, year, month)
|
||||
Rails.logger.info("Re-archiving #{user_id}/#{year}/#{month} (retrospective import)")
|
||||
|
||||
Points::RawData::Archiver.new.archive_specific_month(user_id, year, month)
|
||||
rescue StandardError => e
|
||||
Rails.logger.error("Re-archive failed: #{e.message}")
|
||||
Sentry.capture_exception(e) if defined?(Sentry)
|
||||
raise
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
100
app/models/concerns/archivable.rb
Normal file
100
app/models/concerns/archivable.rb
Normal file
|
|
@ -0,0 +1,100 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Archivable
|
||||
extend ActiveSupport::Concern
|
||||
|
||||
included do
|
||||
# Associations
|
||||
belongs_to :raw_data_archive,
|
||||
class_name: 'Points::RawDataArchive',
|
||||
foreign_key: :raw_data_archive_id,
|
||||
optional: true
|
||||
|
||||
# Scopes
|
||||
scope :archived, -> { where(raw_data_archived: true) }
|
||||
scope :not_archived, -> { where(raw_data_archived: false) }
|
||||
scope :with_archived_raw_data, -> {
|
||||
includes(raw_data_archive: { file_attachment: :blob })
|
||||
}
|
||||
end
|
||||
|
||||
# Main method: Get raw_data with fallback to archive
|
||||
# Use this instead of point.raw_data when you need archived data
|
||||
def raw_data_with_archive
|
||||
# If raw_data is present in DB, use it
|
||||
return raw_data if raw_data.present? || !raw_data_archived?
|
||||
|
||||
# Otherwise fetch from archive
|
||||
fetch_archived_raw_data
|
||||
end
|
||||
|
||||
# Alias for convenience (optional)
|
||||
alias_method :archived_raw_data, :raw_data_with_archive
|
||||
|
||||
# Restore archived data back to database column
|
||||
def restore_raw_data!(value)
|
||||
update!(
|
||||
raw_data: value,
|
||||
raw_data_archived: false,
|
||||
raw_data_archive_id: nil
|
||||
)
|
||||
end
|
||||
|
||||
# Cache key for long-term archive caching
|
||||
def archive_cache_key
|
||||
"raw_data:archive:#{self.class.name.underscore}:#{id}"
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def fetch_archived_raw_data
|
||||
# Check temporary restore cache first (for migrations)
|
||||
cached = check_temporary_restore_cache
|
||||
return cached if cached
|
||||
|
||||
# Check long-term cache (1 day TTL)
|
||||
Rails.cache.fetch(archive_cache_key, expires_in: 1.day) do
|
||||
fetch_from_archive_file
|
||||
end
|
||||
rescue StandardError => e
|
||||
handle_archive_fetch_error(e)
|
||||
end
|
||||
|
||||
def check_temporary_restore_cache
|
||||
return nil unless respond_to?(:timestamp)
|
||||
|
||||
recorded_time = Time.at(timestamp)
|
||||
cache_key = "raw_data:temp:#{user_id}:#{recorded_time.year}:#{recorded_time.month}:#{id}"
|
||||
Rails.cache.read(cache_key)
|
||||
end
|
||||
|
||||
def fetch_from_archive_file
|
||||
return {} unless raw_data_archive&.file&.attached?
|
||||
|
||||
# Download and search through JSONL
|
||||
compressed_content = raw_data_archive.file.blob.download
|
||||
io = StringIO.new(compressed_content)
|
||||
gz = Zlib::GzipReader.new(io)
|
||||
|
||||
result = nil
|
||||
gz.each_line do |line|
|
||||
data = JSON.parse(line)
|
||||
if data['id'] == id
|
||||
result = data['raw_data']
|
||||
break
|
||||
end
|
||||
end
|
||||
|
||||
gz.close
|
||||
result || {}
|
||||
end
|
||||
|
||||
def handle_archive_fetch_error(error)
|
||||
Rails.logger.error(
|
||||
"Failed to fetch archived raw_data for #{self.class.name} #{id}: #{error.message}"
|
||||
)
|
||||
Sentry.capture_exception(error) if defined?(Sentry)
|
||||
|
||||
{} # Graceful degradation
|
||||
end
|
||||
end
|
||||
|
|
@ -3,6 +3,7 @@
|
|||
class Point < ApplicationRecord
|
||||
include Nearable
|
||||
include Distanceable
|
||||
include Archivable
|
||||
|
||||
belongs_to :import, optional: true, counter_cache: true
|
||||
belongs_to :visit, optional: true
|
||||
|
|
|
|||
48
app/models/points/raw_data_archive.rb
Normal file
48
app/models/points/raw_data_archive.rb
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Points
|
||||
class RawDataArchive < ApplicationRecord
|
||||
self.table_name = 'points_raw_data_archives'
|
||||
|
||||
belongs_to :user
|
||||
has_many :points, foreign_key: :raw_data_archive_id, dependent: :nullify
|
||||
|
||||
has_one_attached :file
|
||||
|
||||
validates :year, :month, :chunk_number, :point_count, presence: true
|
||||
validates :year, numericality: { greater_than: 1970, less_than: 2100 }
|
||||
validates :month, numericality: { greater_than_or_equal_to: 1, less_than_or_equal_to: 12 }
|
||||
validates :chunk_number, numericality: { greater_than: 0 }
|
||||
validates :point_ids_checksum, presence: true
|
||||
|
||||
validate :file_must_be_attached, on: :update
|
||||
|
||||
scope :for_month, ->(user_id, year, month) {
|
||||
where(user_id: user_id, year: year, month: month)
|
||||
.order(:chunk_number)
|
||||
}
|
||||
|
||||
scope :recent, -> { where('archived_at > ?', 30.days.ago) }
|
||||
scope :old, -> { where('archived_at < ?', 1.year.ago) }
|
||||
|
||||
def month_display
|
||||
Date.new(year, month, 1).strftime('%B %Y')
|
||||
end
|
||||
|
||||
def filename
|
||||
"raw_data_#{user_id}_#{year}_#{format('%02d', month)}_chunk#{format('%03d', chunk_number)}.jsonl.gz"
|
||||
end
|
||||
|
||||
def size_mb
|
||||
return 0 unless file.attached?
|
||||
|
||||
(file.blob.byte_size / 1024.0 / 1024.0).round(2)
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def file_must_be_attached
|
||||
errors.add(:file, 'must be attached') unless file.attached?
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
@ -20,6 +20,7 @@ class User < ApplicationRecord # rubocop:disable Metrics/ClassLength
|
|||
has_many :tags, dependent: :destroy
|
||||
has_many :trips, dependent: :destroy
|
||||
has_many :tracks, dependent: :destroy
|
||||
has_many :raw_data_archives, class_name: 'Points::RawDataArchive', dependent: :destroy
|
||||
|
||||
after_create :create_api_key
|
||||
after_commit :activate, on: :create, if: -> { DawarichSettings.self_hosted? }
|
||||
|
|
|
|||
155
app/services/points/raw_data/archiver.rb
Normal file
155
app/services/points/raw_data/archiver.rb
Normal file
|
|
@ -0,0 +1,155 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Points
|
||||
module RawData
|
||||
class Archiver
|
||||
SAFE_ARCHIVE_LAG = 2.months
|
||||
|
||||
def initialize
|
||||
@stats = { processed: 0, archived: 0, failed: 0 }
|
||||
end
|
||||
|
||||
def call
|
||||
unless archival_enabled?
|
||||
Rails.logger.info('Raw data archival disabled (ARCHIVE_RAW_DATA != "true")')
|
||||
return @stats
|
||||
end
|
||||
|
||||
Rails.logger.info('Starting points raw_data archival...')
|
||||
|
||||
archivable_months.find_each do |month_data|
|
||||
process_month(month_data)
|
||||
end
|
||||
|
||||
Rails.logger.info("Archival complete: #{@stats}")
|
||||
@stats
|
||||
end
|
||||
|
||||
def archive_specific_month(user_id, year, month)
|
||||
month_data = {
|
||||
'user_id' => user_id,
|
||||
'year' => year,
|
||||
'month' => month
|
||||
}
|
||||
|
||||
process_month(month_data)
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def archival_enabled?
|
||||
ENV['ARCHIVE_RAW_DATA'] == 'true'
|
||||
end
|
||||
|
||||
def archivable_months
|
||||
# Only months 2+ months old with unarchived points
|
||||
safe_cutoff = Date.current.beginning_of_month - SAFE_ARCHIVE_LAG
|
||||
|
||||
Point.select(
|
||||
'user_id',
|
||||
'EXTRACT(YEAR FROM to_timestamp(timestamp))::int as year',
|
||||
'EXTRACT(MONTH FROM to_timestamp(timestamp))::int as month',
|
||||
'COUNT(*) as unarchived_count'
|
||||
).where(raw_data_archived: false)
|
||||
.where('to_timestamp(timestamp) < ?', safe_cutoff)
|
||||
.group('user_id, EXTRACT(YEAR FROM to_timestamp(timestamp)), EXTRACT(MONTH FROM to_timestamp(timestamp))')
|
||||
end
|
||||
|
||||
def process_month(month_data)
|
||||
user_id = month_data['user_id']
|
||||
year = month_data['year']
|
||||
month = month_data['month']
|
||||
|
||||
lock_key = "archive_points:#{user_id}:#{year}:#{month}"
|
||||
|
||||
# Advisory lock prevents duplicate processing
|
||||
ActiveRecord::Base.with_advisory_lock(lock_key, timeout_seconds: 0) do
|
||||
archive_month(user_id, year, month)
|
||||
@stats[:processed] += 1
|
||||
end
|
||||
rescue ActiveRecord::AdvisoryLockError
|
||||
Rails.logger.info("Skipping #{lock_key} - already locked")
|
||||
rescue StandardError => e
|
||||
Rails.logger.error("Archive failed for #{user_id}/#{year}/#{month}: #{e.message}")
|
||||
Sentry.capture_exception(e) if defined?(Sentry)
|
||||
@stats[:failed] += 1
|
||||
end
|
||||
|
||||
def archive_month(user_id, year, month)
|
||||
# Calculate timestamp range for the month
|
||||
start_of_month = Time.new(year, month, 1).to_i
|
||||
end_of_month = (Time.new(year, month, 1) + 1.month).to_i
|
||||
|
||||
# Find unarchived points for this month
|
||||
points = Point.where(
|
||||
user_id: user_id,
|
||||
raw_data_archived: false
|
||||
).where(timestamp: start_of_month...end_of_month)
|
||||
.where.not(raw_data: nil) # Skip already-NULLed points
|
||||
|
||||
return if points.empty?
|
||||
|
||||
point_ids = points.pluck(:id)
|
||||
|
||||
Rails.logger.info("Archiving #{point_ids.count} points for user #{user_id}, #{year}-#{format('%02d', month)}")
|
||||
|
||||
# Create archive chunk
|
||||
archive = create_archive_chunk(user_id, year, month, points, point_ids)
|
||||
|
||||
# Atomically mark points and NULL raw_data
|
||||
Point.transaction do
|
||||
Point.where(id: point_ids).update_all(
|
||||
raw_data_archived: true,
|
||||
raw_data_archive_id: archive.id,
|
||||
raw_data: nil # Reclaim space!
|
||||
)
|
||||
end
|
||||
|
||||
@stats[:archived] += point_ids.count
|
||||
|
||||
Rails.logger.info("✓ Archived chunk #{archive.chunk_number} (#{archive.size_mb} MB)")
|
||||
end
|
||||
|
||||
def create_archive_chunk(user_id, year, month, points, point_ids)
|
||||
# Determine chunk number (append-only)
|
||||
chunk_number = Points::RawDataArchive
|
||||
.where(user_id: user_id, year: year, month: month)
|
||||
.maximum(:chunk_number).to_i + 1
|
||||
|
||||
# Compress points data
|
||||
compressed_data = Points::RawData::ChunkCompressor.new(points).compress
|
||||
|
||||
# Create archive record
|
||||
archive = Points::RawDataArchive.create!(
|
||||
user_id: user_id,
|
||||
year: year,
|
||||
month: month,
|
||||
chunk_number: chunk_number,
|
||||
point_count: point_ids.count,
|
||||
point_ids_checksum: calculate_checksum(point_ids),
|
||||
archived_at: Time.current,
|
||||
metadata: {
|
||||
format_version: 1,
|
||||
compression: 'gzip',
|
||||
archived_by: 'Points::RawData::Archiver'
|
||||
}
|
||||
)
|
||||
|
||||
# Attach compressed file via ActiveStorage
|
||||
filename = "raw_data_#{user_id}_#{year}_#{format('%02d', month)}_chunk#{format('%03d', chunk_number)}.jsonl.gz"
|
||||
|
||||
archive.file.attach(
|
||||
io: StringIO.new(compressed_data),
|
||||
filename: filename,
|
||||
content_type: 'application/gzip'
|
||||
)
|
||||
|
||||
archive
|
||||
end
|
||||
|
||||
def calculate_checksum(point_ids)
|
||||
Digest::SHA256.hexdigest(point_ids.sort.join(','))
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
25
app/services/points/raw_data/chunk_compressor.rb
Normal file
25
app/services/points/raw_data/chunk_compressor.rb
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Points
|
||||
module RawData
|
||||
class ChunkCompressor
|
||||
def initialize(points_relation)
|
||||
@points = points_relation
|
||||
end
|
||||
|
||||
def compress
|
||||
io = StringIO.new
|
||||
gz = Zlib::GzipWriter.new(io)
|
||||
|
||||
# Stream points to avoid memory issues with large months
|
||||
@points.select(:id, :raw_data).find_each(batch_size: 1000) do |point|
|
||||
# Write as JSONL (one JSON object per line)
|
||||
gz.puts({ id: point.id, raw_data: point.raw_data }.to_json)
|
||||
end
|
||||
|
||||
gz.close
|
||||
io.string # Returns compressed bytes
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
106
app/services/points/raw_data/restorer.rb
Normal file
106
app/services/points/raw_data/restorer.rb
Normal file
|
|
@ -0,0 +1,106 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Points
|
||||
module RawData
|
||||
class Restorer
|
||||
def restore_to_database(user_id, year, month)
|
||||
archives = Points::RawDataArchive.for_month(user_id, year, month)
|
||||
|
||||
raise "No archives found for user #{user_id}, #{year}-#{month}" if archives.empty?
|
||||
|
||||
Rails.logger.info("Restoring #{archives.count} archives to database...")
|
||||
|
||||
Point.transaction do
|
||||
archives.each do |archive|
|
||||
restore_archive_to_db(archive)
|
||||
end
|
||||
end
|
||||
|
||||
Rails.logger.info("✓ Restored #{archives.sum(:point_count)} points")
|
||||
end
|
||||
|
||||
def restore_to_memory(user_id, year, month)
|
||||
archives = Points::RawDataArchive.for_month(user_id, year, month)
|
||||
|
||||
raise "No archives found for user #{user_id}, #{year}-#{month}" if archives.empty?
|
||||
|
||||
Rails.logger.info("Loading #{archives.count} archives into cache...")
|
||||
|
||||
cache_key_prefix = "raw_data:temp:#{user_id}:#{year}:#{month}"
|
||||
count = 0
|
||||
|
||||
archives.each do |archive|
|
||||
count += restore_archive_to_cache(archive, cache_key_prefix)
|
||||
end
|
||||
|
||||
Rails.logger.info("✓ Loaded #{count} points into cache (expires in 1 hour)")
|
||||
end
|
||||
|
||||
def restore_all_for_user(user_id)
|
||||
archives = Points::RawDataArchive.where(user_id: user_id)
|
||||
.select(:year, :month)
|
||||
.distinct
|
||||
.order(:year, :month)
|
||||
|
||||
Rails.logger.info("Restoring #{archives.count} months for user #{user_id}...")
|
||||
|
||||
archives.each do |archive|
|
||||
restore_to_database(user_id, archive.year, archive.month)
|
||||
end
|
||||
|
||||
Rails.logger.info("✓ Complete user restore finished")
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def restore_archive_to_db(archive)
|
||||
decompressed = download_and_decompress(archive)
|
||||
|
||||
decompressed.each_line do |line|
|
||||
data = JSON.parse(line)
|
||||
|
||||
Point.where(id: data['id']).update_all(
|
||||
raw_data: data['raw_data'],
|
||||
raw_data_archived: false,
|
||||
raw_data_archive_id: nil
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
def restore_archive_to_cache(archive, cache_key_prefix)
|
||||
decompressed = download_and_decompress(archive)
|
||||
count = 0
|
||||
|
||||
decompressed.each_line do |line|
|
||||
data = JSON.parse(line)
|
||||
|
||||
Rails.cache.write(
|
||||
"#{cache_key_prefix}:#{data['id']}",
|
||||
data['raw_data'],
|
||||
expires_in: 1.hour
|
||||
)
|
||||
|
||||
count += 1
|
||||
end
|
||||
|
||||
count
|
||||
end
|
||||
|
||||
def download_and_decompress(archive)
|
||||
# Download via ActiveStorage
|
||||
compressed_content = archive.file.blob.download
|
||||
|
||||
# Decompress
|
||||
io = StringIO.new(compressed_content)
|
||||
gz = Zlib::GzipReader.new(io)
|
||||
content = gz.read
|
||||
gz.close
|
||||
|
||||
content
|
||||
rescue StandardError => e
|
||||
Rails.logger.error("Failed to download/decompress archive #{archive.id}: #{e.message}")
|
||||
raise
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
23
db/migrate/20251206000001_create_points_raw_data_archives.rb
Normal file
23
db/migrate/20251206000001_create_points_raw_data_archives.rb
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
class CreatePointsRawDataArchives < ActiveRecord::Migration[8.0]
|
||||
def change
|
||||
create_table :points_raw_data_archives do |t|
|
||||
t.bigint :user_id, null: false
|
||||
t.integer :year, null: false
|
||||
t.integer :month, null: false
|
||||
t.integer :chunk_number, null: false, default: 1
|
||||
t.integer :point_count, null: false
|
||||
t.string :point_ids_checksum, null: false
|
||||
t.jsonb :metadata, default: {}, null: false
|
||||
t.datetime :archived_at, null: false
|
||||
|
||||
t.timestamps
|
||||
end
|
||||
|
||||
add_index :points_raw_data_archives, :user_id
|
||||
add_index :points_raw_data_archives, [:user_id, :year, :month]
|
||||
add_index :points_raw_data_archives, :archived_at
|
||||
add_foreign_key :points_raw_data_archives, :users, validate: false
|
||||
end
|
||||
end
|
||||
22
db/migrate/20251206000002_add_archival_columns_to_points.rb
Normal file
22
db/migrate/20251206000002_add_archival_columns_to_points.rb
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
class AddArchivalColumnsToPoints < ActiveRecord::Migration[8.0]
|
||||
disable_ddl_transaction!
|
||||
|
||||
def change
|
||||
add_column :points, :raw_data_archived, :boolean, default: false, null: false
|
||||
add_column :points, :raw_data_archive_id, :bigint, null: true
|
||||
|
||||
add_index :points, :raw_data_archived,
|
||||
where: 'raw_data_archived = true',
|
||||
name: 'index_points_on_archived_true',
|
||||
algorithm: :concurrently
|
||||
add_index :points, :raw_data_archive_id,
|
||||
algorithm: :concurrently
|
||||
|
||||
add_foreign_key :points, :points_raw_data_archives,
|
||||
column: :raw_data_archive_id,
|
||||
on_delete: :nullify, # Don't delete points if archive deleted
|
||||
validate: false
|
||||
end
|
||||
end
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
class ValidateArchivalForeignKeys < ActiveRecord::Migration[8.0]
|
||||
def change
|
||||
validate_foreign_key :points_raw_data_archives, :users
|
||||
validate_foreign_key :points, :points_raw_data_archives
|
||||
end
|
||||
end
|
||||
27
db/schema.rb
generated
27
db/schema.rb
generated
|
|
@ -10,7 +10,7 @@
|
|||
#
|
||||
# It's strongly recommended that you check this file into your version control system.
|
||||
|
||||
ActiveRecord::Schema[8.0].define(version: 2025_12_01_192510) do
|
||||
ActiveRecord::Schema[8.0].define(version: 2025_12_06_000004) do
|
||||
# These are extensions that must be enabled in order to support this database
|
||||
enable_extension "pg_catalog.plpgsql"
|
||||
enable_extension "postgis"
|
||||
|
|
@ -224,6 +224,10 @@ ActiveRecord::Schema[8.0].define(version: 2025_12_01_192510) do
|
|||
t.bigint "country_id"
|
||||
t.bigint "track_id"
|
||||
t.string "country_name"
|
||||
t.boolean "raw_data_archived", default: false, null: false
|
||||
t.bigint "raw_data_archive_id"
|
||||
t.integer "timestamp_year"
|
||||
t.integer "timestamp_month"
|
||||
t.index ["altitude"], name: "index_points_on_altitude"
|
||||
t.index ["battery"], name: "index_points_on_battery"
|
||||
t.index ["battery_status"], name: "index_points_on_battery_status"
|
||||
|
|
@ -238,6 +242,8 @@ ActiveRecord::Schema[8.0].define(version: 2025_12_01_192510) do
|
|||
t.index ["latitude", "longitude"], name: "index_points_on_latitude_and_longitude"
|
||||
t.index ["lonlat", "timestamp", "user_id"], name: "index_points_on_lonlat_timestamp_user_id", unique: true
|
||||
t.index ["lonlat"], name: "index_points_on_lonlat", using: :gist
|
||||
t.index ["raw_data_archive_id"], name: "index_points_on_raw_data_archive_id"
|
||||
t.index ["raw_data_archived"], name: "index_points_on_archived_true", where: "(raw_data_archived = true)"
|
||||
t.index ["reverse_geocoded_at"], name: "index_points_on_reverse_geocoded_at"
|
||||
t.index ["timestamp"], name: "index_points_on_timestamp"
|
||||
t.index ["track_id"], name: "index_points_on_track_id"
|
||||
|
|
@ -245,10 +251,27 @@ ActiveRecord::Schema[8.0].define(version: 2025_12_01_192510) do
|
|||
t.index ["user_id", "country_name"], name: "idx_points_user_country_name"
|
||||
t.index ["user_id", "reverse_geocoded_at"], name: "index_points_on_user_id_and_reverse_geocoded_at", where: "(reverse_geocoded_at IS NOT NULL)"
|
||||
t.index ["user_id", "timestamp", "track_id"], name: "idx_points_track_generation"
|
||||
t.index ["user_id", "timestamp_year", "timestamp_month", "raw_data_archived"], name: "index_points_on_user_time_archived"
|
||||
t.index ["user_id"], name: "index_points_on_user_id"
|
||||
t.index ["visit_id"], name: "index_points_on_visit_id"
|
||||
end
|
||||
|
||||
create_table "points_raw_data_archives", force: :cascade do |t|
|
||||
t.bigint "user_id", null: false
|
||||
t.integer "year", null: false
|
||||
t.integer "month", null: false
|
||||
t.integer "chunk_number", default: 1, null: false
|
||||
t.integer "point_count", null: false
|
||||
t.string "point_ids_checksum", null: false
|
||||
t.jsonb "metadata", default: {}, null: false
|
||||
t.datetime "archived_at", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.datetime "updated_at", null: false
|
||||
t.index ["archived_at"], name: "index_points_raw_data_archives_on_archived_at"
|
||||
t.index ["user_id", "year", "month"], name: "index_points_raw_data_archives_on_user_id_and_year_and_month"
|
||||
t.index ["user_id"], name: "index_points_raw_data_archives_on_user_id"
|
||||
end
|
||||
|
||||
create_table "stats", force: :cascade do |t|
|
||||
t.integer "year", null: false
|
||||
t.integer "month", null: false
|
||||
|
|
@ -384,8 +407,10 @@ ActiveRecord::Schema[8.0].define(version: 2025_12_01_192510) do
|
|||
add_foreign_key "notifications", "users"
|
||||
add_foreign_key "place_visits", "places"
|
||||
add_foreign_key "place_visits", "visits"
|
||||
add_foreign_key "points", "points_raw_data_archives", column: "raw_data_archive_id", on_delete: :nullify
|
||||
add_foreign_key "points", "users"
|
||||
add_foreign_key "points", "visits"
|
||||
add_foreign_key "points_raw_data_archives", "users"
|
||||
add_foreign_key "stats", "users"
|
||||
add_foreign_key "taggings", "tags"
|
||||
add_foreign_key "tags", "users"
|
||||
|
|
|
|||
243
lib/tasks/points_raw_data.rake
Normal file
243
lib/tasks/points_raw_data.rake
Normal file
|
|
@ -0,0 +1,243 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
namespace :points do
|
||||
namespace :raw_data do
|
||||
desc 'Restore raw_data from archive to database for a specific month'
|
||||
task :restore, [:user_id, :year, :month] => :environment do |_t, args|
|
||||
validate_args!(args)
|
||||
|
||||
user_id = args[:user_id].to_i
|
||||
year = args[:year].to_i
|
||||
month = args[:month].to_i
|
||||
|
||||
puts '━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━'
|
||||
puts ' Restoring raw_data to DATABASE'
|
||||
puts " User: #{user_id} | Month: #{year}-#{format('%02d', month)}"
|
||||
puts '━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━'
|
||||
puts ''
|
||||
|
||||
restorer = Points::RawData::Restorer.new
|
||||
restorer.restore_to_database(user_id, year, month)
|
||||
|
||||
puts ''
|
||||
puts '✓ Restoration complete!'
|
||||
puts ''
|
||||
puts "Points in #{year}-#{month} now have raw_data in database."
|
||||
puts 'Run VACUUM ANALYZE points; to update statistics.'
|
||||
end
|
||||
|
||||
desc 'Restore raw_data to memory/cache temporarily (for data migrations)'
|
||||
task :restore_temporary, [:user_id, :year, :month] => :environment do |_t, args|
|
||||
validate_args!(args)
|
||||
|
||||
user_id = args[:user_id].to_i
|
||||
year = args[:year].to_i
|
||||
month = args[:month].to_i
|
||||
|
||||
puts '━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━'
|
||||
puts ' Loading raw_data into CACHE (temporary)'
|
||||
puts " User: #{user_id} | Month: #{year}-#{format('%02d', month)}"
|
||||
puts '━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━'
|
||||
puts ''
|
||||
puts 'Data will be available for 1 hour via Point.raw_data_with_archive accessor'
|
||||
puts ''
|
||||
|
||||
restorer = Points::RawData::Restorer.new
|
||||
restorer.restore_to_memory(user_id, year, month)
|
||||
|
||||
puts ''
|
||||
puts '✓ Cache loaded successfully!'
|
||||
puts ''
|
||||
puts 'You can now run your data migration.'
|
||||
puts 'Example:'
|
||||
puts " rails runner \"Point.where(user_id: #{user_id}, timestamp_year: #{year}, timestamp_month: #{month}).find_each { |p| p.fix_coordinates_from_raw_data }\""
|
||||
puts ''
|
||||
puts 'Cache will expire in 1 hour automatically.'
|
||||
end
|
||||
|
||||
desc 'Restore all archived raw_data for a user'
|
||||
task :restore_all, [:user_id] => :environment do |_t, args|
|
||||
raise 'Usage: rake points:raw_data:restore_all[user_id]' unless args[:user_id]
|
||||
|
||||
user_id = args[:user_id].to_i
|
||||
user = User.find(user_id)
|
||||
|
||||
puts '━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━'
|
||||
puts ' Restoring ALL archives for user'
|
||||
puts " #{user.email} (ID: #{user_id})"
|
||||
puts '━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━'
|
||||
puts ''
|
||||
|
||||
archives = Points::RawDataArchive.where(user_id: user_id)
|
||||
.select(:year, :month)
|
||||
.distinct
|
||||
.order(:year, :month)
|
||||
|
||||
puts "Found #{archives.count} months to restore"
|
||||
puts ''
|
||||
|
||||
archives.each_with_index do |archive, idx|
|
||||
puts "[#{idx + 1}/#{archives.count}] Restoring #{archive.year}-#{format('%02d', archive.month)}..."
|
||||
|
||||
restorer = Points::RawData::Restorer.new
|
||||
restorer.restore_to_database(user_id, archive.year, archive.month)
|
||||
end
|
||||
|
||||
puts ''
|
||||
puts "✓ All archives restored for user #{user_id}!"
|
||||
end
|
||||
|
||||
desc 'Show archive statistics'
|
||||
task status: :environment do
|
||||
puts '━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━'
|
||||
puts ' Points raw_data Archive Statistics'
|
||||
puts '━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━'
|
||||
puts ''
|
||||
|
||||
total_archives = Points::RawDataArchive.count
|
||||
total_points = Point.count
|
||||
archived_points = Point.where(raw_data_archived: true).count
|
||||
percentage = total_points.positive? ? (archived_points.to_f / total_points * 100).round(2) : 0
|
||||
|
||||
puts "Archives: #{total_archives}"
|
||||
puts "Points archived: #{archived_points} / #{total_points} (#{percentage}%)"
|
||||
puts ''
|
||||
|
||||
# Storage size via ActiveStorage
|
||||
total_blob_size = ActiveStorage::Blob
|
||||
.joins('INNER JOIN active_storage_attachments ON active_storage_attachments.blob_id = active_storage_blobs.id')
|
||||
.where("active_storage_attachments.record_type = 'Points::RawDataArchive'")
|
||||
.sum(:byte_size)
|
||||
|
||||
puts "Storage used: #{ActiveSupport::NumberHelper.number_to_human_size(total_blob_size)}"
|
||||
puts ''
|
||||
|
||||
# Recent activity
|
||||
recent = Points::RawDataArchive.where('archived_at > ?', 7.days.ago).count
|
||||
puts "Archives created last 7 days: #{recent}"
|
||||
puts ''
|
||||
|
||||
# Top users
|
||||
puts 'Top 10 users by archive count:'
|
||||
puts '─────────────────────────────────────────────────'
|
||||
|
||||
Points::RawDataArchive.group(:user_id)
|
||||
.select('user_id, COUNT(*) as archive_count, SUM(point_count) as total_points')
|
||||
.order('archive_count DESC')
|
||||
.limit(10)
|
||||
.each_with_index do |stat, idx|
|
||||
user = User.find(stat.user_id)
|
||||
puts "#{idx + 1}. #{user.email.ljust(30)} #{stat.archive_count.to_s.rjust(3)} archives, #{stat.total_points.to_s.rjust(8)} points"
|
||||
end
|
||||
|
||||
puts ''
|
||||
end
|
||||
|
||||
desc 'Verify archive integrity for a month'
|
||||
task :verify, [:user_id, :year, :month] => :environment do |_t, args|
|
||||
validate_args!(args)
|
||||
|
||||
user_id = args[:user_id].to_i
|
||||
year = args[:year].to_i
|
||||
month = args[:month].to_i
|
||||
|
||||
puts '━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━'
|
||||
puts ' Verifying Archives'
|
||||
puts " User: #{user_id} | Month: #{year}-#{format('%02d', month)}"
|
||||
puts '━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━'
|
||||
puts ''
|
||||
|
||||
archives = Points::RawDataArchive.for_month(user_id, year, month)
|
||||
|
||||
if archives.empty?
|
||||
puts 'No archives found.'
|
||||
exit
|
||||
end
|
||||
|
||||
all_ok = true
|
||||
|
||||
archives.each do |archive|
|
||||
print "Chunk #{archive.chunk_number}: "
|
||||
|
||||
# Check file attached
|
||||
unless archive.file.attached?
|
||||
puts '✗ ERROR - File not attached!'
|
||||
all_ok = false
|
||||
next
|
||||
end
|
||||
|
||||
# Download and count
|
||||
begin
|
||||
compressed = archive.file.blob.download
|
||||
io = StringIO.new(compressed)
|
||||
gz = Zlib::GzipReader.new(io)
|
||||
|
||||
actual_count = 0
|
||||
gz.each_line { actual_count += 1 }
|
||||
gz.close
|
||||
|
||||
if actual_count == archive.point_count
|
||||
puts "✓ OK (#{actual_count} points, #{archive.size_mb} MB)"
|
||||
else
|
||||
puts "✗ MISMATCH - Expected #{archive.point_count}, found #{actual_count}"
|
||||
all_ok = false
|
||||
end
|
||||
rescue StandardError => e
|
||||
puts "✗ ERROR - #{e.message}"
|
||||
all_ok = false
|
||||
end
|
||||
end
|
||||
|
||||
puts ''
|
||||
if all_ok
|
||||
puts '✓ All archives verified successfully!'
|
||||
else
|
||||
puts '✗ Some archives have issues. Please investigate.'
|
||||
end
|
||||
end
|
||||
|
||||
desc 'Run initial archival for old data (safe to re-run)'
|
||||
task initial_archive: :environment do
|
||||
puts '━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━'
|
||||
puts ' Initial Archival (2+ months old data)'
|
||||
puts '━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━'
|
||||
puts ''
|
||||
puts 'This will archive points.raw_data for months 2+ months old.'
|
||||
puts 'This is safe to run multiple times (idempotent).'
|
||||
puts ''
|
||||
print 'Continue? (y/N): '
|
||||
|
||||
response = $stdin.gets.chomp.downcase
|
||||
unless response == 'y'
|
||||
puts 'Cancelled.'
|
||||
exit
|
||||
end
|
||||
|
||||
puts ''
|
||||
stats = Points::RawData::Archiver.new.call
|
||||
|
||||
puts ''
|
||||
puts '━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━'
|
||||
puts ' Archival Complete'
|
||||
puts '━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━'
|
||||
puts ''
|
||||
puts "Months processed: #{stats[:processed]}"
|
||||
puts "Points archived: #{stats[:archived]}"
|
||||
puts "Failures: #{stats[:failed]}"
|
||||
puts ''
|
||||
|
||||
return unless stats[:archived].positive?
|
||||
|
||||
puts 'Next steps:'
|
||||
puts '1. Verify a sample: rake points:raw_data:verify[user_id,year,month]'
|
||||
puts '2. Check stats: rake points:raw_data:status'
|
||||
puts '3. (Optional) Reclaim space: VACUUM FULL points; (during maintenance)'
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def validate_args!(args)
|
||||
return if args[:user_id] && args[:year] && args[:month]
|
||||
|
||||
raise 'Usage: rake points:raw_data:TASK[user_id,year,month]'
|
||||
end
|
||||
Loading…
Reference in a new issue