Add rake tasks for data cleanup and user activation

This commit is contained in:
Eugene Burmakin 2025-03-21 18:09:58 +01:00
parent b305d21bd3
commit dbd9b7f31f
14 changed files with 221 additions and 43 deletions

View file

@ -1 +1 @@
0.25.1 0.25.2

View file

@ -4,6 +4,23 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](http://keepachangelog.com/) The format is based on [Keep a Changelog](http://keepachangelog.com/)
and this project adheres to [Semantic Versioning](http://semver.org/). and this project adheres to [Semantic Versioning](http://semver.org/).
# 0.25.2 - 2025-03-21
## Fixed
- Migration to add unique index to points now contains code to remove duplicates from the database.
- Issue with ESRI maps not being displayed correctly. #956
## Added
- `rake data:cleanup:remove_duplicate_points` task added to remove duplicate points from the database and export them to a CSV file.
- `rake data:cleanup:update_points_to_use_lonlat` task added for convenient manual migration of points to the new `lonlat` column.
- `rake users:activate` task added to activate all users.
## Changed
- Merged visits now use the combined name of the merged visits.
# 0.25.1 - 2025-03-17 # 0.25.1 - 2025-03-17
## Fixed ## Fixed

File diff suppressed because one or more lines are too long

View file

@ -108,6 +108,10 @@ export function cyclOsmMapLayer(map, selectedLayerName) {
export function esriWorldStreetMapLayer(map, selectedLayerName) { export function esriWorldStreetMapLayer(map, selectedLayerName) {
let layerName = 'esriWorldStreet'; let layerName = 'esriWorldStreet';
let layer = L.tileLayer('https://server.arcgisonline.com/ArcGIS/rest/services/World_Street_Map/MapServer/tile/{z}/{y}/{x}', { let layer = L.tileLayer('https://server.arcgisonline.com/ArcGIS/rest/services/World_Street_Map/MapServer/tile/{z}/{y}/{x}', {
minZoom: 1,
maxZoom: 19,
bounds: [[-90, -180], [90, 180]],
noWrap: true,
attribution: 'Tiles © Esri — Source: Esri, DeLorme, NAVTEQ, USGS, Intermap, iPC, NRCAN, Esri Japan, METI, Esri China (Hong Kong), Esri (Thailand), TomTom, 2012' attribution: 'Tiles © Esri — Source: Esri, DeLorme, NAVTEQ, USGS, Intermap, iPC, NRCAN, Esri Japan, METI, Esri China (Hong Kong), Esri (Thailand), TomTom, 2012'
}); });
@ -121,6 +125,10 @@ export function esriWorldStreetMapLayer(map, selectedLayerName) {
export function esriWorldTopoMapLayer(map, selectedLayerName) { export function esriWorldTopoMapLayer(map, selectedLayerName) {
let layerName = 'esriWorldTopo'; let layerName = 'esriWorldTopo';
let layer = L.tileLayer('https://server.arcgisonline.com/ArcGIS/rest/services/World_Topo_Map/MapServer/tile/{z}/{y}/{x}', { let layer = L.tileLayer('https://server.arcgisonline.com/ArcGIS/rest/services/World_Topo_Map/MapServer/tile/{z}/{y}/{x}', {
minZoom: 1,
maxZoom: 19,
bounds: [[-90, -180], [90, 180]],
noWrap: true,
attribution: 'Tiles © Esri — Esri, DeLorme, NAVTEQ, TomTom, Intermap, iPC, USGS, FAO, NPS, NRCAN, GeoBase, Kadaster NL, Ordnance Survey, Esri Japan, METI, Esri China (Hong Kong), and the GIS User Community' attribution: 'Tiles © Esri — Esri, DeLorme, NAVTEQ, TomTom, Intermap, iPC, USGS, FAO, NPS, NRCAN, GeoBase, Kadaster NL, Ordnance Survey, Esri Japan, METI, Esri China (Hong Kong), and the GIS User Community'
}); });
@ -134,6 +142,10 @@ export function esriWorldTopoMapLayer(map, selectedLayerName) {
export function esriWorldImageryMapLayer(map, selectedLayerName) { export function esriWorldImageryMapLayer(map, selectedLayerName) {
let layerName = 'esriWorldImagery'; let layerName = 'esriWorldImagery';
let layer = L.tileLayer('https://server.arcgisonline.com/ArcGIS/rest/services/World_Imagery/MapServer/tile/{z}/{y}/{x}', { let layer = L.tileLayer('https://server.arcgisonline.com/ArcGIS/rest/services/World_Imagery/MapServer/tile/{z}/{y}/{x}', {
minZoom: 1,
maxZoom: 19,
bounds: [[-90, -180], [90, 180]],
noWrap: true,
attribution: 'Tiles © Esri — Source: Esri, i-cubed, USDA, USGS, AEX, GeoEye, Getmapping, Aerogrid, IGN, IGP, UPR-EGP, and the GIS User Community' attribution: 'Tiles © Esri — Source: Esri, i-cubed, USDA, USGS, AEX, GeoEye, Getmapping, Aerogrid, IGN, IGP, UPR-EGP, and the GIS User Community'
}); });
@ -147,8 +159,11 @@ export function esriWorldImageryMapLayer(map, selectedLayerName) {
export function esriWorldGrayCanvasMapLayer(map, selectedLayerName) { export function esriWorldGrayCanvasMapLayer(map, selectedLayerName) {
let layerName = 'esriWorldGrayCanvas'; let layerName = 'esriWorldGrayCanvas';
let layer = L.tileLayer('https://server.arcgisonline.com/ArcGIS/rest/services/Canvas/World_Light_Gray_Base/MapServer/tile/{z}/{y}/{x}', { let layer = L.tileLayer('https://server.arcgisonline.com/ArcGIS/rest/services/Canvas/World_Light_Gray_Base/MapServer/tile/{z}/{y}/{x}', {
attribution: 'Tiles © Esri — Esri, DeLorme, NAVTEQ', minZoom: 1,
maxZoom: 16 maxZoom: 16,
bounds: [[-90, -180], [90, 180]],
noWrap: true,
attribution: 'Tiles © Esri — Esri, DeLorme, NAVTEQ'
}); });
if (selectedLayerName === layerName) { if (selectedLayerName === layerName) {

View file

@ -26,19 +26,22 @@ export const mapsConfig = {
}, },
"esriWorldStreet": { "esriWorldStreet": {
url: "https://server.arcgisonline.com/ArcGIS/rest/services/World_Street_Map/MapServer/tile/{z}/{y}/{x}", url: "https://server.arcgisonline.com/ArcGIS/rest/services/World_Street_Map/MapServer/tile/{z}/{y}/{x}",
maxZoom: 19,
attribution: "Tiles © Esri — Source: Esri, DeLorme, NAVTEQ, USGS, Intermap, iPC, NRCAN, Esri Japan, METI, Esri China (Hong Kong), Esri (Thailand), TomTom, 2012" attribution: "Tiles © Esri — Source: Esri, DeLorme, NAVTEQ, USGS, Intermap, iPC, NRCAN, Esri Japan, METI, Esri China (Hong Kong), Esri (Thailand), TomTom, 2012"
}, },
"esriWorldTopo": { "esriWorldTopo": {
url: "https://server.arcgisonline.com/ArcGIS/rest/services/World_Topo_Map/MapServer/tile/{z}/{y}/{x}", url: "https://server.arcgisonline.com/ArcGIS/rest/services/World_Topo_Map/MapServer/tile/{z}/{y}/{x}",
maxZoom: 19,
attribution: "Tiles © Esri — Esri, DeLorme, NAVTEQ, TomTom, Intermap, iPC, USGS, FAO, NPS, NRCAN, GeoBase, Kadaster NL, Ordnance Survey, Esri Japan, METI, Esri China (Hong Kong), and the GIS User Community" attribution: "Tiles © Esri — Esri, DeLorme, NAVTEQ, TomTom, Intermap, iPC, USGS, FAO, NPS, NRCAN, GeoBase, Kadaster NL, Ordnance Survey, Esri Japan, METI, Esri China (Hong Kong), and the GIS User Community"
}, },
"esriWorldImagery": { "esriWorldImagery": {
url: "https://server.arcgisonline.com/ArcGIS/rest/services/World_Imagery/MapServer/tile/{z}/{y}/{x}", url: "https://server.arcgisonline.com/ArcGIS/rest/services/World_Imagery/MapServer/tile/{z}/{y}/{x}",
maxZoom: 19,
attribution: "Tiles © Esri — Source: Esri, i-cubed, USDA, USGS, AEX, GeoEye, Getmapping, Aerogrid, IGN, IGP, UPR-EGP, and the GIS User Community" attribution: "Tiles © Esri — Source: Esri, i-cubed, USDA, USGS, AEX, GeoEye, Getmapping, Aerogrid, IGN, IGP, UPR-EGP, and the GIS User Community"
}, },
"esriWorldGrayCanvas": { "esriWorldGrayCanvas": {
url: "https://server.arcgisonline.com/ArcGIS/rest/services/Canvas/World_Light_Gray_Base/MapServer/tile/{z}/{y}/{x}", url: "https://server.arcgisonline.com/ArcGIS/rest/services/Canvas/World_Light_Gray_Base/MapServer/tile/{z}/{y}/{x}",
attribution: "Tiles © Esri — Esri, DeLorme, NAVTEQ", maxZoom: 16,
maxZoom: 16 attribution: "Tiles © Esri — Esri, DeLorme, NAVTEQ"
} }
}; };

View file

@ -3,20 +3,11 @@
module PointValidation module PointValidation
extend ActiveSupport::Concern extend ActiveSupport::Concern
# Check if a point with the same coordinates, timestamp, and user_id already exists
def point_exists?(params, user_id) def point_exists?(params, user_id)
# Ensure the coordinates are valid
longitude = params[:longitude].to_f
latitude = params[:latitude].to_f
# Check if longitude and latitude are valid values
return false if longitude.zero? && latitude.zero?
return false if longitude.abs > 180 || latitude.abs > 90
# Use where with parameter binding and then exists?
Point.where( Point.where(
'ST_SetSRID(ST_MakePoint(?, ?), 4326) = lonlat AND timestamp = ? AND user_id = ?', lonlat: params[:lonlat],
longitude, latitude, params[:timestamp].to_i, user_id timestamp: params[:timestamp].to_i,
user_id:
).exists? ).exists?
end end
end end

View file

@ -45,7 +45,7 @@ module Visits
earliest_start = visits.min_by(&:started_at).started_at earliest_start = visits.min_by(&:started_at).started_at
latest_end = visits.max_by(&:ended_at).ended_at latest_end = visits.max_by(&:ended_at).ended_at
total_duration = ((latest_end - earliest_start) / 60).round total_duration = ((latest_end - earliest_start) / 60).round
combined_name = "Combined Visit (#{visits.map(&:name).join(', ')})" combined_name = visits.map(&:name).join(', ')
{ {
earliest_start:, earliest_start:,

View file

@ -1 +1 @@
DataMigrate::Data.define(version: 20250120154554) DataMigrate::Data.define(version: 20250303194123)

View file

@ -9,6 +9,19 @@ class AddUniqueIndexToPoints < ActiveRecord::Migration[8.0]
name: 'unique_points_lat_long_timestamp_user_id_index' name: 'unique_points_lat_long_timestamp_user_id_index'
) )
execute <<-SQL
DELETE FROM points
WHERE id IN (
SELECT id
FROM (
SELECT id,
ROW_NUMBER() OVER (PARTITION BY latitude, longitude, timestamp, user_id ORDER BY id) as row_num
FROM points
) AS duplicates
WHERE duplicates.row_num > 1
);
SQL
add_index :points, %i[latitude longitude timestamp user_id], add_index :points, %i[latitude longitude timestamp user_id],
unique: true, unique: true,
name: 'unique_points_lat_long_timestamp_user_id_index', name: 'unique_points_lat_long_timestamp_user_id_index',

1
db/schema.rb generated
View file

@ -126,7 +126,6 @@ ActiveRecord::Schema[8.0].define(version: 2025_03_03_194043) do
t.datetime "created_at", null: false t.datetime "created_at", null: false
t.datetime "updated_at", null: false t.datetime "updated_at", null: false
t.geography "lonlat", limit: {srid: 4326, type: "st_point", geographic: true} t.geography "lonlat", limit: {srid: 4326, type: "st_point", geographic: true}
t.index "name, st_astext(lonlat)", name: "index_places_on_name_and_lonlat", unique: true
t.index ["lonlat"], name: "index_places_on_lonlat", using: :gist t.index ["lonlat"], name: "index_places_on_lonlat", using: :gist
end end

119
lib/tasks/data_cleanup.rake Normal file
View file

@ -0,0 +1,119 @@
require 'csv'
namespace :data_cleanup do
desc 'Remove duplicate points using raw SQL and export them to a file'
task remove_duplicate_points: :environment do
timestamp = Time.current.strftime('%Y%m%d%H%M%S')
export_path = Rails.root.join("tmp/duplicate_points_#{timestamp}.csv")
connection = ActiveRecord::Base.connection
puts 'Finding duplicates...'
# First create temp tables for each duplicate type separately
connection.execute(<<~SQL)
DROP TABLE IF EXISTS lat_long_duplicates;
CREATE TEMPORARY TABLE lat_long_duplicates AS
SELECT id
FROM (
SELECT id,
ROW_NUMBER() OVER (PARTITION BY latitude, longitude, timestamp, user_id ORDER BY id) as row_num
FROM points
) AS dups
WHERE dups.row_num > 1;
SQL
connection.execute(<<~SQL)
DROP TABLE IF EXISTS lonlat_duplicates;
CREATE TEMPORARY TABLE lonlat_duplicates AS
SELECT id
FROM (
SELECT id,
ROW_NUMBER() OVER (PARTITION BY lonlat, timestamp, user_id ORDER BY id) as row_num
FROM points
) AS dups
WHERE dups.row_num > 1;
SQL
# Then create the combined duplicates table
connection.execute(<<~SQL)
DROP TABLE IF EXISTS duplicate_points;
CREATE TEMPORARY TABLE duplicate_points AS
SELECT id FROM lat_long_duplicates
UNION
SELECT id FROM lonlat_duplicates;
SQL
# Count duplicates
duplicate_count = connection.select_value('SELECT COUNT(*) FROM duplicate_points').to_i
puts "Found #{duplicate_count} duplicate points"
if duplicate_count > 0
# Export duplicates to CSV
puts "Exporting duplicates to #{export_path}..."
columns = connection.select_values("SELECT column_name FROM information_schema.columns WHERE table_name = 'points' ORDER BY ordinal_position")
CSV.open(export_path, 'wb') do |csv|
# Write headers
csv << columns
# Export data in batches to avoid memory issues
offset = 0
batch_size = 1000
loop do
sql = <<~SQL
SELECT #{columns.join(',')}
FROM points
WHERE id IN (SELECT id FROM duplicate_points)
ORDER BY id
LIMIT #{batch_size} OFFSET #{offset};
SQL
records = connection.select_all(sql)
break if records.empty?
records.each do |record|
csv << columns.map { |col| record[col] }
end
offset += batch_size
print '.' if (offset % 10_000).zero?
end
end
puts "\nSuccessfully exported #{duplicate_count} duplicate points to #{export_path}"
# Delete the duplicates
deleted_count = connection.execute(<<~SQL)
DELETE FROM points
WHERE id IN (SELECT id FROM duplicate_points);
SQL
puts "Successfully deleted #{deleted_count.cmd_tuples} duplicate points"
# Clean up
connection.execute('DROP TABLE IF EXISTS lat_long_duplicates;')
connection.execute('DROP TABLE IF EXISTS lonlat_duplicates;')
connection.execute('DROP TABLE IF EXISTS duplicate_points;')
else
puts 'No duplicate points to remove'
end
end
desc 'Update points to use lonlat field from latitude and longitude'
task update_points_to_use_lonlat: :environment do
puts 'Updating points to use lonlat...'
# Use PostGIS functions to properly create geography type
result = ActiveRecord::Base.connection.execute(<<~SQL)
UPDATE points
SET lonlat = ST_SetSRID(ST_MakePoint(longitude, latitude), 4326)::geography
WHERE lonlat IS NULL
AND longitude IS NOT NULL
AND latitude IS NOT NULL;
SQL
puts "Successfully updated #{result.cmd_tuples} points with lonlat values"
end
end

18
lib/tasks/users.rake Normal file
View file

@ -0,0 +1,18 @@
# frozen_string_literal: true
namespace :users do
desc 'Activate all users'
task activate: :environment do
unless DawarichSettings.self_hosted?
puts 'This task is only available for self-hosted users'
exit 1
end
puts 'Activating all users...'
# rubocop:disable Rails/SkipsModelValidations
User.update_all(status: :active)
# rubocop:enable Rails/SkipsModelValidations
puts 'All users have been activated'
end
end

View file

@ -16,23 +16,23 @@ RSpec.describe PointValidation do
describe '#point_exists?' do describe '#point_exists?' do
context 'with invalid coordinates' do context 'with invalid coordinates' do
it 'returns false for zero coordinates' do it 'returns false for zero coordinates' do
params = { longitude: '0', latitude: '0', timestamp: Time.now.to_i } params = { lonlat: 'POINT(0 0)', timestamp: Time.now.to_i }
expect(validator.point_exists?(params, user.id)).to be false expect(validator.point_exists?(params, user.id)).to be false
end end
it 'returns false for longitude outside valid range' do it 'returns false for longitude outside valid range' do
params = { longitude: '181', latitude: '45', timestamp: Time.now.to_i } params = { lonlat: 'POINT(181 45)', timestamp: Time.now.to_i }
expect(validator.point_exists?(params, user.id)).to be false expect(validator.point_exists?(params, user.id)).to be false
params = { longitude: '-181', latitude: '45', timestamp: Time.now.to_i } params = { lonlat: 'POINT(-181 45)', timestamp: Time.now.to_i }
expect(validator.point_exists?(params, user.id)).to be false expect(validator.point_exists?(params, user.id)).to be false
end end
it 'returns false for latitude outside valid range' do it 'returns false for latitude outside valid range' do
params = { longitude: '45', latitude: '91', timestamp: Time.now.to_i } params = { lonlat: 'POINT(45 91)', timestamp: Time.now.to_i }
expect(validator.point_exists?(params, user.id)).to be false expect(validator.point_exists?(params, user.id)).to be false
params = { longitude: '45', latitude: '-91', timestamp: Time.now.to_i } params = { lonlat: 'POINT(45 -91)', timestamp: Time.now.to_i }
expect(validator.point_exists?(params, user.id)).to be false expect(validator.point_exists?(params, user.id)).to be false
end end
end end
@ -41,7 +41,7 @@ RSpec.describe PointValidation do
let(:longitude) { 10.0 } let(:longitude) { 10.0 }
let(:latitude) { 50.0 } let(:latitude) { 50.0 }
let(:timestamp) { Time.now.to_i } let(:timestamp) { Time.now.to_i }
let(:params) { { longitude: longitude.to_s, latitude: latitude.to_s, timestamp: timestamp } } let(:params) { { lonlat: "POINT(#{longitude} #{latitude})", timestamp: timestamp } }
context 'when point does not exist' do context 'when point does not exist' do
before do before do
@ -54,8 +54,9 @@ RSpec.describe PointValidation do
it 'queries the database with correct parameters' do it 'queries the database with correct parameters' do
expect(Point).to receive(:where).with( expect(Point).to receive(:where).with(
'ST_SetSRID(ST_MakePoint(?, ?), 4326) = lonlat AND timestamp = ? AND user_id = ?', lonlat: "POINT(#{longitude} #{latitude})",
longitude, latitude, timestamp, user.id timestamp: timestamp,
user_id: user.id
).and_return(double(exists?: false)) ).and_return(double(exists?: false))
validator.point_exists?(params, user.id) validator.point_exists?(params, user.id)
@ -75,11 +76,12 @@ RSpec.describe PointValidation do
context 'with string parameters' do context 'with string parameters' do
it 'converts string coordinates to float values' do it 'converts string coordinates to float values' do
params = { longitude: '10.5', latitude: '50.5', timestamp: '1650000000' } params = { lonlat: 'POINT(10.5 50.5)', timestamp: '1650000000' }
expect(Point).to receive(:where).with( expect(Point).to receive(:where).with(
'ST_SetSRID(ST_MakePoint(?, ?), 4326) = lonlat AND timestamp = ? AND user_id = ?', lonlat: 'POINT(10.5 50.5)',
10.5, 50.5, 1_650_000_000, user.id timestamp: 1_650_000_000,
user_id: user.id
).and_return(double(exists?: false)) ).and_return(double(exists?: false))
validator.point_exists?(params, user.id) validator.point_exists?(params, user.id)
@ -88,14 +90,14 @@ RSpec.describe PointValidation do
context 'with different boundary values' do context 'with different boundary values' do
it 'accepts maximum valid coordinate values' do it 'accepts maximum valid coordinate values' do
params = { longitude: '180', latitude: '90', timestamp: Time.now.to_i } params = { lonlat: 'POINT(180 90)', timestamp: Time.now.to_i }
expect(Point).to receive(:where).and_return(double(exists?: false)) expect(Point).to receive(:where).and_return(double(exists?: false))
expect(validator.point_exists?(params, user.id)).to be false expect(validator.point_exists?(params, user.id)).to be false
end end
it 'accepts minimum valid coordinate values' do it 'accepts minimum valid coordinate values' do
params = { longitude: '-180', latitude: '-90', timestamp: Time.now.to_i } params = { lonlat: 'POINT(-180 -90)', timestamp: Time.now.to_i }
expect(Point).to receive(:where).and_return(double(exists?: false)) expect(Point).to receive(:where).and_return(double(exists?: false))
expect(validator.point_exists?(params, user.id)).to be false expect(validator.point_exists?(params, user.id)).to be false
@ -109,8 +111,7 @@ RSpec.describe PointValidation do
let(:existing_timestamp) { 1_650_000_000 } let(:existing_timestamp) { 1_650_000_000 }
let(:existing_point_params) do let(:existing_point_params) do
{ {
longitude: 10.5, lonlat: 'POINT(10.5 50.5)',
latitude: 50.5,
timestamp: existing_timestamp, timestamp: existing_timestamp,
user_id: user.id user_id: user.id
} }
@ -130,8 +131,7 @@ RSpec.describe PointValidation do
it 'returns true when a point with same coordinates and timestamp exists' do it 'returns true when a point with same coordinates and timestamp exists' do
params = { params = {
longitude: existing_point_params[:longitude].to_s, lonlat: 'POINT(10.5 50.5)',
latitude: existing_point_params[:latitude].to_s,
timestamp: existing_timestamp timestamp: existing_timestamp
} }
@ -140,8 +140,7 @@ RSpec.describe PointValidation do
it 'returns false when a point with different coordinates exists' do it 'returns false when a point with different coordinates exists' do
params = { params = {
longitude: (existing_point_params[:longitude] + 0.1).to_s, lonlat: 'POINT(10.6 50.5)',
latitude: existing_point_params[:latitude].to_s,
timestamp: existing_timestamp timestamp: existing_timestamp
} }
@ -150,8 +149,7 @@ RSpec.describe PointValidation do
it 'returns false when a point with different timestamp exists' do it 'returns false when a point with different timestamp exists' do
params = { params = {
longitude: existing_point_params[:longitude].to_s, lonlat: 'POINT(10.5 50.5)',
latitude: existing_point_params[:latitude].to_s,
timestamp: existing_timestamp + 1 timestamp: existing_timestamp + 1
} }

View file

@ -58,7 +58,7 @@ RSpec.describe Visits::MergeService do
service = described_class.new([visit1, visit2]) service = described_class.new([visit1, visit2])
result = service.call result = service.call
expected_name = "Combined Visit (#{visit1_name}, #{visit2_name})" expected_name = "#{visit1_name}, #{visit2_name}"
expect(result.name).to eq(expected_name) expect(result.name).to eq(expected_name)
end end