Ingest trend data from FASP

This commit is contained in:
David Roetzel 2025-02-27 12:11:47 +01:00
parent f50bd3d6d9
commit aeefb4f6d1
No known key found for this signature in database
23 changed files with 523 additions and 97 deletions

View File

@ -0,0 +1,19 @@
# frozen_string_literal: true
# == Schema Information
#
# Table name: fasp_preview_card_trends
#
# id :bigint(8) not null, primary key
# allowed :boolean default(FALSE), not null
# language :string not null
# rank :integer not null
# created_at :datetime not null
# updated_at :datetime not null
# fasp_provider_id :bigint(8) not null
# preview_card_id :bigint(8) not null
#
class Fasp::PreviewCardTrend < ApplicationRecord
belongs_to :preview_card
belongs_to :fasp_provider, class_name: 'Fasp::Provider'
end

View File

@ -29,6 +29,10 @@ class Fasp::Provider < ApplicationRecord
before_create :create_keypair
after_commit :update_remote_capabilities
scope :with_capability, lambda { |capability_name|
where('fasp_providers.capabilities @> ?::jsonb', "[{\"id\": \"#{capability_name}\", \"enabled\": true}]")
}
def enabled_capabilities=(hash)
capabilities.each do |capability|
capability['enabled'] = hash[capability['id']] == '1'

View File

@ -0,0 +1,19 @@
# frozen_string_literal: true
# == Schema Information
#
# Table name: fasp_status_trends
#
# id :bigint(8) not null, primary key
# allowed :boolean default(FALSE), not null
# language :string not null
# rank :integer not null
# created_at :datetime not null
# updated_at :datetime not null
# fasp_provider_id :bigint(8) not null
# status_id :bigint(8) not null
#
class Fasp::StatusTrend < ApplicationRecord
belongs_to :status
belongs_to :fasp_provider, class_name: 'Fasp::Provider'
end

View File

@ -0,0 +1,19 @@
# frozen_string_literal: true
# == Schema Information
#
# Table name: fasp_tag_trends
#
# id :bigint(8) not null, primary key
# allowed :boolean default(FALSE), not null
# language :string not null
# rank :integer not null
# created_at :datetime not null
# updated_at :datetime not null
# fasp_provider_id :bigint(8) not null
# tag_id :bigint(8) not null
#
class Fasp::TagTrend < ApplicationRecord
belongs_to :tag
belongs_to :fasp_provider, class_name: 'Fasp::Provider'
end

View File

@ -0,0 +1,43 @@
# frozen_string_literal: true
class Fasp::RefreshPreviewCardTrendsService
def call(provider, language)
results = query_trends(provider, language)
Fasp::PreviewCardTrend.transaction do
Fasp::PreviewCardTrend.where(language:).delete_all
(results['links'] || []).each do |link|
preview_card = fetch_preview_card(link['url'])
next unless preview_card
Fasp::PreviewCardTrend.create!(
fasp_provider: provider,
preview_card:,
language:,
rank: link['rank'],
allowed: !preview_card.trendable?.nil?
)
fetch_examples(link['examples'])
end
end
end
private
def fetch_preview_card(url)
FetchLinkCardForURLService.new.call(url)
end
def fetch_examples(uris)
uris.each { |u| FetchReplyWorker.perform_async(u) }
end
def query_trends(provider, language)
params = { language:, withinLastHours: 4, maxCount: 20 }
Fasp::Request.new(provider).get("/trends/v0/links?#{params.to_query}")
end
end

View File

@ -0,0 +1,37 @@
# frozen_string_literal: true
class Fasp::RefreshStatusTrendsService
def call(provider, language)
results = query_trends(provider, language)
Fasp::StatusTrend.transaction do
Fasp::StatusTrend.where(language:).delete_all
(results['content'] || []).each do |result|
status = fetch_status(result['uri'])
next if status.nil?
Fasp::StatusTrend.create!(
fasp_provider: provider,
status:,
language:,
rank: result['rank'],
allowed: !status.trendable?.nil?
)
end
end
end
private
def fetch_status(uri)
ResolveURLService.new.call(uri)
end
def query_trends(provider, language)
params = { language:, withinLastHours: 4, maxCount: 20 }
Fasp::Request.new(provider).get("/trends/v0/content?#{params.to_query}")
end
end

View File

@ -0,0 +1,37 @@
# frozen_string_literal: true
class Fasp::RefreshTagTrendsService
def call(provider, language)
results = query_trends(provider, language)
Fasp::TagTrend.transaction do
Fasp::TagTrend.where(language:).delete_all
(results['hashtags'] || []).each do |result|
tag = Tag.find_or_create_by_names(result['name']).first
Fasp::TagTrend.create!(
fasp_provider: provider,
tag:,
language:,
rank: result['rank'],
allowed: tag.trendable?
)
fetch_examples(result['examples'])
end
end
end
private
def fetch_examples(uris)
uris.each { |u| FetchReplyWorker.perform_async(u) }
end
def query_trends(provider, language)
params = { language:, withinLastHours: 4, maxCount: 20 }
Fasp::Request.new(provider).get("/trends/v0/hashtags?#{params.to_query}")
end
end

View File

@ -0,0 +1,118 @@
# frozen_string_literal: true
class FetchLinkCardForURLService < BaseService
include Redisable
include Lockable
def call(url)
@original_url = Addressable::URI.parse(url).normalize
return if bad_url?(@original_url)
@url = @original_url.to_s
with_redis_lock("fetch:#{@original_url}") do
@card = PreviewCard.find_by(url: @url)
process_url if @card.nil? || @card.updated_at <= 2.weeks.ago || @card.missing_image?
end
@card
rescue *Mastodon::HTTP_CONNECTION_ERRORS, Addressable::URI::InvalidURIError, Mastodon::HostValidationError, Mastodon::LengthValidationError, Encoding::UndefinedConversionError, ActiveRecord::RecordInvalid => e
Rails.logger.debug { "Error fetching link #{@url}: #{e}" }
nil
end
private
def process_url
@card ||= PreviewCard.new(url: @url)
attempt_oembed || attempt_opengraph
end
def html
return @html if defined?(@html)
headers = {
'Accept' => 'text/html',
'Accept-Language' => "#{I18n.default_locale}, *;q=0.5",
'User-Agent' => "#{Mastodon::Version.user_agent} Bot",
}
@html = Request.new(:get, @url).add_headers(headers).perform do |res|
next unless res.code == 200 && res.mime_type == 'text/html'
# We follow redirects, and ideally we want to save the preview card for
# the destination URL and not any link shortener in-between, so here
# we set the URL to the one of the last response in the redirect chain
@url = res.request.uri.to_s
@card = PreviewCard.find_or_initialize_by(url: @url) if @card.url != @url
@html_charset = res.charset
res.truncated_body
end
end
def bad_url?(uri)
# Avoid local instance URLs and invalid URLs
uri.host.blank? || TagManager.instance.local_url?(uri.to_s) || !%w(http https).include?(uri.scheme)
end
def attempt_oembed
service = FetchOEmbedService.new
url_domain = Addressable::URI.parse(@url).normalized_host
cached_endpoint = Rails.cache.read("oembed_endpoint:#{url_domain}")
embed = service.call(@url, cached_endpoint: cached_endpoint) unless cached_endpoint.nil?
embed ||= service.call(@url, html: html) unless html.nil?
return false if embed.nil?
url = Addressable::URI.parse(service.endpoint_url)
@card.type = embed[:type]
@card.title = embed[:title] || ''
@card.author_name = embed[:author_name] || ''
@card.author_url = embed[:author_url].present? ? (url + embed[:author_url]).to_s : ''
@card.provider_name = embed[:provider_name] || ''
@card.provider_url = embed[:provider_url].present? ? (url + embed[:provider_url]).to_s : ''
@card.width = 0
@card.height = 0
case @card.type
when 'link'
@card.image_remote_url = (url + embed[:thumbnail_url]).to_s if embed[:thumbnail_url].present?
when 'photo'
return false if embed[:url].blank?
@card.embed_url = (url + embed[:url]).to_s
@card.image_remote_url = (url + embed[:url]).to_s
@card.width = embed[:width].presence || 0
@card.height = embed[:height].presence || 0
when 'video'
@card.width = embed[:width].presence || 0
@card.height = embed[:height].presence || 0
@card.html = Sanitize.fragment(embed[:html], Sanitize::Config::MASTODON_OEMBED)
@card.image_remote_url = (url + embed[:thumbnail_url]).to_s if embed[:thumbnail_url].present?
when 'rich'
# Most providers rely on <script> tags, which is a no-no
return false
end
@card.save_with_optional_image!
end
def attempt_opengraph
return if html.nil?
link_details_extractor = LinkDetailsExtractor.new(@url, @html, @html_charset)
domain = Addressable::URI.parse(link_details_extractor.canonical_url).normalized_host
provider = PreviewCardProvider.matching_domain(domain)
linked_account = ResolveAccountService.new.call(link_details_extractor.author_account, suppress_errors: true) if link_details_extractor.author_account.present?
@card = PreviewCard.find_or_initialize_by(url: link_details_extractor.canonical_url) if link_details_extractor.canonical_url != @card.url
@card.assign_attributes(link_details_extractor.to_preview_card_attributes)
@card.author_account = linked_account if linked_account&.can_be_attributed_from?(domain) || provider&.trendable?
@card.save_with_optional_image! unless @card.title.blank? && @card.html.blank?
end
end

View File

@ -16,56 +16,25 @@ class FetchLinkCardService < BaseService
}iox
def call(status)
@status = status
@status = status
return if @status.with_preview_card?
@original_url = parse_urls
return if @original_url.nil? || @status.with_preview_card?
return if @original_url.nil?
@url = @original_url.to_s
with_redis_lock("fetch:#{@original_url}") do
@card = PreviewCard.find_by(url: @url)
process_url if @card.nil? || @card.updated_at <= 2.weeks.ago || @card.missing_image?
end
@card = FetchLinkCardForURLService.new.call(@url)
attach_card if @card&.persisted?
rescue *Mastodon::HTTP_CONNECTION_ERRORS, Addressable::URI::InvalidURIError, Mastodon::HostValidationError, Mastodon::LengthValidationError, Encoding::UndefinedConversionError, ActiveRecord::RecordInvalid => e
Rails.logger.debug { "Error fetching link #{@original_url}: #{e}" }
rescue ActiveRecord::RecordInvalid => e
Rails.logger.debug { "Error attching preview card for #{@original_url}: #{e}" }
nil
end
private
def process_url
@card ||= PreviewCard.new(url: @url)
attempt_oembed || attempt_opengraph
end
def html
return @html if defined?(@html)
headers = {
'Accept' => 'text/html',
'Accept-Language' => "#{I18n.default_locale}, *;q=0.5",
'User-Agent' => "#{Mastodon::Version.user_agent} Bot",
}
@html = Request.new(:get, @url).add_headers(headers).perform do |res|
next unless res.code == 200 && res.mime_type == 'text/html'
# We follow redirects, and ideally we want to save the preview card for
# the destination URL and not any link shortener in-between, so here
# we set the URL to the one of the last response in the redirect chain
@url = res.request.uri.to_s
@card = PreviewCard.find_or_initialize_by(url: @url) if @card.url != @url
@html_charset = res.charset
res.truncated_body
end
end
def attach_card
with_redis_lock("attach_card:#{@status.id}") do
return if @status.with_preview_card?
@ -104,62 +73,4 @@ class FetchLinkCardService < BaseService
# Avoid links for hashtags and mentions (microformats)
anchor['rel']&.include?('tag') || anchor['class']&.match?(/u-url|h-card/) || mention_link?(anchor)
end
def attempt_oembed
service = FetchOEmbedService.new
url_domain = Addressable::URI.parse(@url).normalized_host
cached_endpoint = Rails.cache.read("oembed_endpoint:#{url_domain}")
embed = service.call(@url, cached_endpoint: cached_endpoint) unless cached_endpoint.nil?
embed ||= service.call(@url, html: html) unless html.nil?
return false if embed.nil?
url = Addressable::URI.parse(service.endpoint_url)
@card.type = embed[:type]
@card.title = embed[:title] || ''
@card.author_name = embed[:author_name] || ''
@card.author_url = embed[:author_url].present? ? (url + embed[:author_url]).to_s : ''
@card.provider_name = embed[:provider_name] || ''
@card.provider_url = embed[:provider_url].present? ? (url + embed[:provider_url]).to_s : ''
@card.width = 0
@card.height = 0
case @card.type
when 'link'
@card.image_remote_url = (url + embed[:thumbnail_url]).to_s if embed[:thumbnail_url].present?
when 'photo'
return false if embed[:url].blank?
@card.embed_url = (url + embed[:url]).to_s
@card.image_remote_url = (url + embed[:url]).to_s
@card.width = embed[:width].presence || 0
@card.height = embed[:height].presence || 0
when 'video'
@card.width = embed[:width].presence || 0
@card.height = embed[:height].presence || 0
@card.html = Sanitize.fragment(embed[:html], Sanitize::Config::MASTODON_OEMBED)
@card.image_remote_url = (url + embed[:thumbnail_url]).to_s if embed[:thumbnail_url].present?
when 'rich'
# Most providers rely on <script> tags, which is a no-no
return false
end
@card.save_with_optional_image!
end
def attempt_opengraph
return if html.nil?
link_details_extractor = LinkDetailsExtractor.new(@url, @html, @html_charset)
domain = Addressable::URI.parse(link_details_extractor.canonical_url).normalized_host
provider = PreviewCardProvider.matching_domain(domain)
linked_account = ResolveAccountService.new.call(link_details_extractor.author_account, suppress_errors: true) if link_details_extractor.author_account.present?
@card = PreviewCard.find_or_initialize_by(url: link_details_extractor.canonical_url) if link_details_extractor.canonical_url != @card.url
@card.assign_attributes(link_details_extractor.to_preview_card_attributes)
@card.author_account = linked_account if linked_account&.can_be_attributed_from?(domain) || provider&.trendable?
@card.save_with_optional_image! unless @card.title.blank? && @card.html.blank?
end
end

View File

@ -0,0 +1,24 @@
# frozen_string_literal: true
class Scheduler::Fasp::RefreshPreviewCardTrendsScheduler
include Sidekiq::Worker
sidekiq_options retry: 0, lock: :until_executed, lock_ttl: 30.minutes.to_i
def perform
return unless Mastodon::Feature.fasp_enabled?
trends_providers = Fasp::Provider.with_capability('trends')
return if trends_providers.none?
languages = User.signed_in_recently.pluck(Arel.sql('DISTINCT(unnest(chosen_languages))'))
service = Fasp::RefreshPreviewCardTrendsService.new
languages.each do |language|
trends_providers.each do |provider|
service.call(provider, language)
end
end
end
end

View File

@ -0,0 +1,24 @@
# frozen_string_literal: true
class Scheduler::Fasp::RefreshStatusTrendsScheduler
include Sidekiq::Worker
sidekiq_options retry: 0, lock: :until_executed, lock_ttl: 30.minutes.to_i
def perform
return unless Mastodon::Feature.fasp_enabled?
trends_providers = Fasp::Provider.with_capability('trends')
return if trends_providers.none?
languages = User.signed_in_recently.pluck(Arel.sql('DISTINCT(unnest(chosen_languages))'))
service = Fasp::RefreshStatusTrendsService.new
languages.each do |language|
trends_providers.each do |provider|
service.call(provider, language)
end
end
end
end

View File

@ -0,0 +1,24 @@
# frozen_string_literal: true
class Scheduler::Fasp::RefreshTagTrendsScheduler
include Sidekiq::Worker
sidekiq_options retry: 0, lock: :until_executed, lock_ttl: 30.minutes.to_i
def perform
return unless Mastodon::Feature.fasp_enabled?
trends_providers = Fasp::Provider.with_capability('trends')
return if trends_providers.none?
languages = User.signed_in_recently.pluck(Arel.sql('DISTINCT(unnest(chosen_languages))'))
service = Fasp::RefreshTagTrendsService.new
languages.each do |language|
trends_providers.each do |provider|
service.call(provider, language)
end
end
end
end

View File

@ -68,3 +68,15 @@
interval: 1 hour
class: Scheduler::AutoCloseRegistrationsScheduler
queue: scheduler
refresh_tag_trends_from_fasp:
interval: 1 hour
class: Scheduler::Fasp::RefreshTagTrendsScheduler
queue: scheduler
refresh_preview_card_trends_from_fasp:
interval: 1 hour
class: Scheduler::Fasp::RefreshPreviewCardTrendsScheduler
queue: scheduler
refresh_status_trends_from_fasp:
interval: 1 hour
class: Scheduler::Fasp::RefreshStatusTrendsScheduler
queue: scheduler

View File

@ -0,0 +1,15 @@
# frozen_string_literal: true
class CreateFaspTagTrends < ActiveRecord::Migration[8.0]
def change
create_table :fasp_tag_trends do |t|
t.references :tag, null: false, foreign_key: true
t.references :fasp_provider, null: false, foreign_key: true
t.integer :rank, null: false
t.string :language, null: false
t.boolean :allowed, null: false, default: false
t.timestamps
end
end
end

View File

@ -0,0 +1,15 @@
# frozen_string_literal: true
class CreateFaspPreviewCardTrends < ActiveRecord::Migration[8.0]
def change
create_table :fasp_preview_card_trends do |t|
t.references :preview_card, null: false, foreign_key: true
t.references :fasp_provider, null: false, foreign_key: true
t.integer :rank, null: false
t.string :language, null: false
t.boolean :allowed, null: false, default: false
t.timestamps
end
end
end

View File

@ -0,0 +1,15 @@
# frozen_string_literal: true
class CreateFaspStatusTrends < ActiveRecord::Migration[8.0]
def change
create_table :fasp_status_trends do |t|
t.references :status, null: false, foreign_key: true
t.references :fasp_provider, null: false, foreign_key: true
t.integer :rank, null: false
t.string :language, null: false
t.boolean :allowed, null: false, default: false
t.timestamps
end
end
end

View File

@ -10,7 +10,7 @@
#
# It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema[8.0].define(version: 2025_01_29_144813) do
ActiveRecord::Schema[8.0].define(version: 2025_02_26_085011) do
# These are extensions that must be enabled in order to support this database
enable_extension "pg_catalog.plpgsql"
@ -464,6 +464,18 @@ ActiveRecord::Schema[8.0].define(version: 2025_01_29_144813) do
t.index ["fasp_provider_id"], name: "index_fasp_debug_callbacks_on_fasp_provider_id"
end
create_table "fasp_preview_card_trends", force: :cascade do |t|
t.bigint "preview_card_id", null: false
t.bigint "fasp_provider_id", null: false
t.integer "rank", null: false
t.string "language", null: false
t.boolean "allowed", default: false, null: false
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.index ["fasp_provider_id"], name: "index_fasp_preview_card_trends_on_fasp_provider_id"
t.index ["preview_card_id"], name: "index_fasp_preview_card_trends_on_preview_card_id"
end
create_table "fasp_providers", force: :cascade do |t|
t.boolean "confirmed", default: false, null: false
t.string "name", null: false
@ -481,6 +493,18 @@ ActiveRecord::Schema[8.0].define(version: 2025_01_29_144813) do
t.index ["base_url"], name: "index_fasp_providers_on_base_url", unique: true
end
create_table "fasp_status_trends", force: :cascade do |t|
t.bigint "status_id", null: false
t.bigint "fasp_provider_id", null: false
t.integer "rank", null: false
t.string "language", null: false
t.boolean "allowed", default: false, null: false
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.index ["fasp_provider_id"], name: "index_fasp_status_trends_on_fasp_provider_id"
t.index ["status_id"], name: "index_fasp_status_trends_on_status_id"
end
create_table "fasp_subscriptions", force: :cascade do |t|
t.string "category", null: false
t.string "subscription_type", null: false
@ -495,6 +519,18 @@ ActiveRecord::Schema[8.0].define(version: 2025_01_29_144813) do
t.index ["fasp_provider_id"], name: "index_fasp_subscriptions_on_fasp_provider_id"
end
create_table "fasp_tag_trends", force: :cascade do |t|
t.bigint "tag_id", null: false
t.bigint "fasp_provider_id", null: false
t.integer "rank", null: false
t.string "language", null: false
t.boolean "allowed", default: false, null: false
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.index ["fasp_provider_id"], name: "index_fasp_tag_trends_on_fasp_provider_id"
t.index ["tag_id"], name: "index_fasp_tag_trends_on_tag_id"
end
create_table "favourites", force: :cascade do |t|
t.datetime "created_at", precision: nil, null: false
t.datetime "updated_at", precision: nil, null: false
@ -1337,7 +1373,13 @@ ActiveRecord::Schema[8.0].define(version: 2025_01_29_144813) do
add_foreign_key "email_domain_blocks", "email_domain_blocks", column: "parent_id", on_delete: :cascade
add_foreign_key "fasp_backfill_requests", "fasp_providers"
add_foreign_key "fasp_debug_callbacks", "fasp_providers"
add_foreign_key "fasp_preview_card_trends", "fasp_providers"
add_foreign_key "fasp_preview_card_trends", "preview_cards"
add_foreign_key "fasp_status_trends", "fasp_providers"
add_foreign_key "fasp_status_trends", "statuses"
add_foreign_key "fasp_subscriptions", "fasp_providers"
add_foreign_key "fasp_tag_trends", "fasp_providers"
add_foreign_key "fasp_tag_trends", "tags"
add_foreign_key "favourites", "accounts", name: "fk_5eb6c2b873", on_delete: :cascade
add_foreign_key "favourites", "statuses", name: "fk_b0e856845e", on_delete: :cascade
add_foreign_key "featured_tags", "accounts", on_delete: :cascade

View File

@ -0,0 +1,9 @@
# frozen_string_literal: true
Fabricator('Fasp::PreviewCardTrend') do
preview_card
fasp_provider
rank 1
language 'en'
allowed false
end

View File

@ -0,0 +1,9 @@
# frozen_string_literal: true
Fabricator('Fasp::StatusTrend') do
status
fasp_provider
rank 1
language 'en'
allowed false
end

View File

@ -0,0 +1,9 @@
# frozen_string_literal: true
Fabricator('Fasp::TagTrend') do
tag
fasp_provider
rank 1
language 'en'
allowed false
end

View File

@ -0,0 +1,7 @@
# frozen_string_literal: true
require 'rails_helper'
RSpec.describe Fasp::PreviewCardTrend do
pending "add some examples to (or delete) #{__FILE__}"
end

View File

@ -0,0 +1,7 @@
# frozen_string_literal: true
require 'rails_helper'
RSpec.describe Fasp::StatusTrend do
pending "add some examples to (or delete) #{__FILE__}"
end

View File

@ -0,0 +1,7 @@
# frozen_string_literal: true
require 'rails_helper'
RSpec.describe Fasp::TagTrend do
pending "add some examples to (or delete) #{__FILE__}"
end