This commit is contained in:
Jonny Saunders 2025-05-03 03:46:39 -07:00 committed by GitHub
commit 89064976a4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
13 changed files with 311 additions and 127 deletions

View File

@ -110,3 +110,17 @@ FETCH_REPLIES_MAX_SINGLE=500
# Max number of replies Collection pages to fetch - total # Max number of replies Collection pages to fetch - total
FETCH_REPLIES_MAX_PAGES=500 FETCH_REPLIES_MAX_PAGES=500
# Account Backfill Behavior
# --------------------------
# When the first person from your instance follows a remote account,
# backfill their most recent n statuses.
# (default: true if unset, set explicitly to ``false`` to disable)
ACCOUNT_BACKFILL_ENABLED=true
# Max statuses to fetch when backfilling a new account
ACCOUNT_BACKFILL_MAX_STATUSES=1000
# Max number of replies Collection pages to fetch
ACCOUNT_BACKFILL_MAX_PAGES=200

View File

@ -203,6 +203,85 @@ module JsonLdHelper
end end
end end
# Iterate through the pages of an activitypub collection,
# returning the collected items and the number of pages that were fetched.
#
# @param collection_or_uri [String, Hash]
# either the URI or an already-fetched AP object
# @param max_pages [Integer, nil]
# Max pages to fetch, if nil, fetch until no more pages
# @param max_items [Integer, nil]
# Max items to fetch, if nil, fetch until no more items
# @param reference_uri [String, nil]
# If not nil, a URI to compare to the collection URI.
# If the host of the collection URI does not match the reference URI,
# do not fetch the collection page.
# @param on_behalf_of [Account, nil]
# Sign the request on behalf of the Account, if not nil
# @return [Array<Array<Hash>, Integer>, nil]
# The collection items and the number of pages fetched
def collection_items(collection_or_uri, max_pages: 1, max_items: nil, reference_uri: nil, on_behalf_of: nil)
collection = fetch_collection(collection_or_uri, reference_uri: reference_uri, on_behalf_of: on_behalf_of)
return unless collection.is_a?(Hash)
collection = fetch_collection(collection['first'], reference_uri: reference_uri, on_behalf_of: on_behalf_of) if collection['first'].present?
return unless collection.is_a?(Hash)
items = []
n_pages = 1
while collection.is_a?(Hash)
items.concat(as_array(collection_page_items(collection)))
break if !max_items.nil? && items.size >= max_items
break if !max_pages.nil? && n_pages >= max_pages
collection = collection['next'].present? ? fetch_collection(collection['next'], reference_uri: reference_uri, on_behalf_of: on_behalf_of) : nil
n_pages += 1
end
[items, n_pages]
end
def collection_page_items(collection)
case collection['type']
when 'Collection', 'CollectionPage'
collection['items']
when 'OrderedCollection', 'OrderedCollectionPage'
collection['orderedItems']
end
end
# Fetch a single collection page
# To get the whole collection, use collection_items
#
# @param collection_or_uri [String, Hash]
# @param reference_uri [String, nil]
# If not nil, a URI to compare to the collection URI.
# If the host of the collection URI does not match the reference URI,
# do not fetch the collection page.
# @param on_behalf_of [Account, nil]
# Sign the request on behalf of the Account, if not nil
# @return [Hash, nil]
def fetch_collection(collection_or_uri, reference_uri: nil, on_behalf_of: nil)
return collection_or_uri if collection_or_uri.is_a?(Hash)
return if !reference_uri.nil? && non_matching_uri_hosts?(reference_uri, collection_or_uri)
# NOTE: For backward compatibility reasons, Mastodon signs outgoing
# queries incorrectly by default.
#
# While this is relevant for all URLs with query strings, this is
# the only code path where this happens in practice.
#
# Therefore, retry with correct signatures if this fails.
begin
fetch_resource_without_id_validation(collection_or_uri, on_behalf_of, raise_on_error: :temporary)
rescue Mastodon::UnexpectedResponseError => e
raise unless e.response && e.response.code == 401 && Addressable::URI.parse(collection_or_uri).query.present?
fetch_resource_without_id_validation(collection_or_uri, on_behalf_of, raise_on_error: :temporary, request_options: { omit_query_string: false })
end
end
def valid_activitypub_content_type?(response) def valid_activitypub_content_type?(response)
return true if response.mime_type == 'application/activity+json' return true if response.mime_type == 'application/activity+json'

View File

@ -32,6 +32,7 @@ class FollowRequest < ApplicationRecord
validates :languages, language: true validates :languages, language: true
def authorize! def authorize!
is_first_follow = first_follow?
follow = account.follow!(target_account, reblogs: show_reblogs, notify: notify, languages: languages, uri: uri, bypass_limit: true) follow = account.follow!(target_account, reblogs: show_reblogs, notify: notify, languages: languages, uri: uri, bypass_limit: true)
if account.local? if account.local?
@ -40,6 +41,7 @@ class FollowRequest < ApplicationRecord
MergeWorker.push_bulk(List.where(account: account).joins(:list_accounts).where(list_accounts: { account_id: target_account.id }).pluck(:id)) do |list_id| MergeWorker.push_bulk(List.where(account: account).joins(:list_accounts).where(list_accounts: { account_id: target_account.id }).pluck(:id)) do |list_id|
[target_account.id, list_id, 'list'] [target_account.id, list_id, 'list']
end end
ActivityPub::AccountBackfillWorker.perform_async(target_account.id) if is_first_follow & ActivityPub::AccountBackfillService::ENABLED
end end
destroy! destroy!
@ -51,6 +53,10 @@ class FollowRequest < ApplicationRecord
false # Force uri_for to use uri attribute false # Force uri_for to use uri attribute
end end
def first_follow?
!target_account.followers.local.exists?
end
before_validation :set_uri, only: :create before_validation :set_uri, only: :create
after_commit :invalidate_follow_recommendations_cache after_commit :invalidate_follow_recommendations_cache

View File

@ -0,0 +1,53 @@
# frozen_string_literal: true
class ActivityPub::AccountBackfillService < BaseService
include JsonLdHelper
ENABLED = ENV['ACCOUNT_BACKFILL_ENABLED'].nil? || ENV['ACCOUNT_BACKFILL_ENABLED'] == 'true'
MAX_STATUSES = (ENV['ACCOUNT_BACKFILL_MAX_STATUSES'] || 1000).to_i
MAX_PAGES = (ENV['ACCOUNT_BACKFILL_MAX_PAGES'] || 200).to_i
def call(account, on_behalf_of: nil, request_id: nil)
return unless ENABLED
@account = account
return if @account.nil? || @account.outbox_url.nil?
@items, = collection_items(@account.outbox_url, max_items: MAX_STATUSES, max_pages: MAX_PAGES, on_behalf_of: on_behalf_of)
@items = filter_items(@items)
return if @items.nil?
on_behalf_of_id = on_behalf_of&.id
FetchReplyWorker.push_bulk(@items) do |status_uri_or_body|
if status_uri_or_body.is_a?(Hash) && status_uri_or_body.key?('object') && status_uri_or_body.key?('id')
# Re-add the minimally-acceptable @context, which gets stripped because this object comes inside a collection
status_uri_or_body['@context'] = ActivityPub::TagManager::CONTEXT unless status_uri_or_body.key?('@context')
[status_uri_or_body['id'], { prefetched_body: status_uri_or_body, request_id: request_id, on_behalf_of: on_behalf_of_id }]
else
[status_uri_or_body, { request_id: request_id, on_behalf_of: on_behalf_of_id }]
end
end
@items
end
private
# Reject any non-public statuses.
# Since our request may have been signed on behalf of the follower,
# we may have received followers-only statuses.
#
# Formally, a followers-only status is addressed to the account's followers collection.
# We were not in that collection at the time that the post was made,
# so followers-only statuses fetched by backfilling are not addressed to us.
# Public and unlisted statuses are send to the activitystreams "Public" entity.
# We are part of the public, so those posts *are* addressed to us.
#
# @param items [Array<Hash>]
# @return [Array<Hash>]
def filter_items(items)
allowed = [:public, :unlisted]
items.filter { |item| item.is_a?(String) || allowed.include?(ActivityPub::Parser::StatusParser.new(item).visibility) }
end
end

View File

@ -12,30 +12,12 @@ class ActivityPub::FetchFeaturedCollectionService < BaseService
return unless supported_context?(@json) return unless supported_context?(@json)
process_items(collection_items(@json)) @items, = collection_items(@json, max_pages: 1, reference_uri: @account.uri, on_behalf_of: local_follower)
process_items(@items)
end end
private private
def collection_items(collection)
collection = fetch_collection(collection['first']) if collection['first'].present?
return unless collection.is_a?(Hash)
case collection['type']
when 'Collection', 'CollectionPage'
as_array(collection['items'])
when 'OrderedCollection', 'OrderedCollectionPage'
as_array(collection['orderedItems'])
end
end
def fetch_collection(collection_or_uri)
return collection_or_uri if collection_or_uri.is_a?(Hash)
return if non_matching_uri_hosts?(@account.uri, collection_or_uri)
fetch_resource_without_id_validation(collection_or_uri, local_follower, raise_on_error: :temporary)
end
def process_items(items) def process_items(items)
return if items.nil? return if items.nil?

View File

@ -11,43 +11,12 @@ class ActivityPub::FetchFeaturedTagsCollectionService < BaseService
return unless supported_context?(@json) return unless supported_context?(@json)
process_items(collection_items(@json)) @items, = collection_items(@json, max_items: FeaturedTag::LIMIT, reference_uri: @account.uri, on_behalf_of: local_follower)
process_items(@items)
end end
private private
def collection_items(collection)
all_items = []
collection = fetch_collection(collection['first']) if collection['first'].present?
while collection.is_a?(Hash)
items = case collection['type']
when 'Collection', 'CollectionPage'
collection['items']
when 'OrderedCollection', 'OrderedCollectionPage'
collection['orderedItems']
end
break if items.blank?
all_items.concat(items)
break if all_items.size >= FeaturedTag::LIMIT
collection = collection['next'].present? ? fetch_collection(collection['next']) : nil
end
all_items
end
def fetch_collection(collection_or_uri)
return collection_or_uri if collection_or_uri.is_a?(Hash)
return if non_matching_uri_hosts?(@account.uri, collection_or_uri)
fetch_resource_without_id_validation(collection_or_uri, local_follower, raise_on_error: :temporary)
end
def process_items(items) def process_items(items)
names = items.filter_map { |item| item['type'] == 'Hashtag' && item['name']&.delete_prefix('#') }.take(FeaturedTag::LIMIT) names = items.filter_map { |item| item['type'] == 'Hashtag' && item['name']&.delete_prefix('#') }.take(FeaturedTag::LIMIT)
tags = names.index_by { |name| HashtagNormalizer.new.normalize(name) } tags = names.index_by { |name| HashtagNormalizer.new.normalize(name) }

View File

@ -11,6 +11,9 @@ class ActivityPub::FetchRemoteStatusService < BaseService
def call(uri, prefetched_body: nil, on_behalf_of: nil, expected_actor_uri: nil, request_id: nil) def call(uri, prefetched_body: nil, on_behalf_of: nil, expected_actor_uri: nil, request_id: nil)
return if domain_not_allowed?(uri) return if domain_not_allowed?(uri)
# load the account if given as an ID
on_behalf_of = Account.find(on_behalf_of) unless on_behalf_of.nil? || on_behalf_of.is_a?(Account)
@request_id = request_id || "#{Time.now.utc.to_i}-status-#{uri}" @request_id = request_id || "#{Time.now.utc.to_i}-status-#{uri}"
@json = if prefetched_body.nil? @json = if prefetched_body.nil?
fetch_status(uri, true, on_behalf_of) fetch_status(uri, true, on_behalf_of)

View File

@ -8,9 +8,13 @@ class ActivityPub::FetchRepliesService < BaseService
def call(reference_uri, collection_or_uri, max_pages: 1, allow_synchronous_requests: true, request_id: nil) def call(reference_uri, collection_or_uri, max_pages: 1, allow_synchronous_requests: true, request_id: nil)
@reference_uri = reference_uri @reference_uri = reference_uri
@allow_synchronous_requests = allow_synchronous_requests return if !allow_synchronous_requests && !collection_or_uri.is_a?(Hash)
@items, n_pages = collection_items(collection_or_uri, max_pages: max_pages) # if given a prefetched collection while forbidding synchronous requests,
# process it and return without fetching additional pages
max_pages = 1 if !allow_synchronous_requests && collection_or_uri.is_a?(Hash)
@items, n_pages = collection_items(collection_or_uri, max_pages: max_pages, max_items: MAX_REPLIES, reference_uri: @reference_uri)
return if @items.nil? return if @items.nil?
@items = filter_replies(@items) @items = filter_replies(@items)
@ -21,58 +25,6 @@ class ActivityPub::FetchRepliesService < BaseService
private private
def collection_items(collection_or_uri, max_pages: 1)
collection = fetch_collection(collection_or_uri)
return unless collection.is_a?(Hash)
collection = fetch_collection(collection['first']) if collection['first'].present?
return unless collection.is_a?(Hash)
items = []
n_pages = 1
while collection.is_a?(Hash)
items.concat(as_array(collection_page_items(collection)))
break if items.size >= MAX_REPLIES
break if n_pages >= max_pages
collection = collection['next'].present? ? fetch_collection(collection['next']) : nil
n_pages += 1
end
[items, n_pages]
end
def collection_page_items(collection)
case collection['type']
when 'Collection', 'CollectionPage'
collection['items']
when 'OrderedCollection', 'OrderedCollectionPage'
collection['orderedItems']
end
end
def fetch_collection(collection_or_uri)
return collection_or_uri if collection_or_uri.is_a?(Hash)
return unless @allow_synchronous_requests
return if non_matching_uri_hosts?(@reference_uri, collection_or_uri)
# NOTE: For backward compatibility reasons, Mastodon signs outgoing
# queries incorrectly by default.
#
# While this is relevant for all URLs with query strings, this is
# the only code path where this happens in practice.
#
# Therefore, retry with correct signatures if this fails.
begin
fetch_resource_without_id_validation(collection_or_uri, nil, raise_on_error: :temporary)
rescue Mastodon::UnexpectedResponseError => e
raise unless e.response && e.response.code == 401 && Addressable::URI.parse(collection_or_uri).query.present?
fetch_resource_without_id_validation(collection_or_uri, nil, raise_on_error: :temporary, request_options: { omit_query_string: false })
end
end
def filter_replies(items) def filter_replies(items)
# Only fetch replies to the same server as the original status to avoid # Only fetch replies to the same server as the original status to avoid
# amplification attacks. # amplification attacks.

View File

@ -63,10 +63,10 @@ class ActivityPub::SynchronizeFollowersService < BaseService
# Only returns true if the whole collection has been processed # Only returns true if the whole collection has been processed
def process_collection!(collection_uri, max_pages: MAX_COLLECTION_PAGES) def process_collection!(collection_uri, max_pages: MAX_COLLECTION_PAGES)
collection = fetch_collection(collection_uri) collection = fetch_collection(collection_uri, reference_uri: @account.uri)
return false unless collection.is_a?(Hash) return false unless collection.is_a?(Hash)
collection = fetch_collection(collection['first']) if collection['first'].present? collection = fetch_collection(collection['first'], reference_uri: @account.uri) if collection['first'].present?
while collection.is_a?(Hash) while collection.is_a?(Hash)
process_page!(as_array(collection_page_items(collection))) process_page!(as_array(collection_page_items(collection)))
@ -81,20 +81,4 @@ class ActivityPub::SynchronizeFollowersService < BaseService
false false
end end
def collection_page_items(collection)
case collection['type']
when 'Collection', 'CollectionPage'
collection['items']
when 'OrderedCollection', 'OrderedCollectionPage'
collection['orderedItems']
end
end
def fetch_collection(collection_or_uri)
return collection_or_uri if collection_or_uri.is_a?(Hash)
return if non_matching_uri_hosts?(@account.uri, collection_or_uri)
fetch_resource_without_id_validation(collection_or_uri, nil, raise_on_error: :temporary)
end
end end

View File

@ -0,0 +1,13 @@
# frozen_string_literal: true
class ActivityPub::AccountBackfillWorker
include Sidekiq::Worker
include ExponentialBackoff
def perform(account_id, options = {})
account = Account.find(account_id)
return if account.local?
ActivityPub::AccountBackfillService.new.call(account, **options.deep_symbolize_keys)
end
end

View File

@ -7,6 +7,6 @@ class FetchReplyWorker
sidekiq_options queue: 'pull', retry: 3 sidekiq_options queue: 'pull', retry: 3
def perform(child_url, options = {}) def perform(child_url, options = {})
FetchRemoteStatusService.new.call(child_url, **options.deep_symbolize_keys) FetchRemoteStatusService.new.call(child_url, **options.symbolize_keys)
end end
end end

View File

@ -20,17 +20,19 @@ RSpec.describe FollowRequest do
end end
end end
it 'calls Account#follow!, MergeWorker.perform_async, and #destroy!' do it 'calls Account#follow!, MergeWorker.perform_async, ActivityPub::AccountBackfillWorker, and #destroy!' do
allow(account).to receive(:follow!) do allow(account).to receive(:follow!) do
account.active_relationships.create!(target_account: target_account) account.active_relationships.create!(target_account: target_account)
end end
allow(MergeWorker).to receive(:perform_async) allow(MergeWorker).to receive(:perform_async)
allow(ActivityPub::AccountBackfillWorker).to receive(:perform_async)
allow(follow_request).to receive(:destroy!) allow(follow_request).to receive(:destroy!)
follow_request.authorize! follow_request.authorize!
expect(account).to have_received(:follow!).with(target_account, reblogs: true, notify: false, uri: follow_request.uri, languages: nil, bypass_limit: true) expect(account).to have_received(:follow!).with(target_account, reblogs: true, notify: false, uri: follow_request.uri, languages: nil, bypass_limit: true)
expect(MergeWorker).to have_received(:perform_async).with(target_account.id, account.id, 'home') expect(MergeWorker).to have_received(:perform_async).with(target_account.id, account.id, 'home')
expect(ActivityPub::AccountBackfillWorker).to have_received(:perform_async).with(target_account.id)
expect(follow_request).to have_received(:destroy!) expect(follow_request).to have_received(:destroy!)
end end
@ -47,6 +49,21 @@ RSpec.describe FollowRequest do
target = follow_request.target_account target = follow_request.target_account
expect(follow_request.account.muting_reblogs?(target)).to be true expect(follow_request.account.muting_reblogs?(target)).to be true
end end
context 'when subsequent follow requests are made' do
before do
second_account = Fabricate(:account)
second_account.follow!(target_account)
end
it 'doesnt call ActivityPub::AccountBackfillWorker' do
allow(ActivityPub::AccountBackfillWorker).to receive(:perform_async)
follow_request.authorize!
expect(ActivityPub::AccountBackfillWorker).to_not have_received(:perform_async)
end
end
end end
describe '#reject!' do describe '#reject!' do

View File

@ -0,0 +1,112 @@
# frozen_string_literal: true
require 'rails_helper'
RSpec.describe ActivityPub::AccountBackfillService do
subject { described_class.new }
before do
stub_const('ActivityPub::AccountBackfillService::ENABLED', true)
end
let!(:account) { Fabricate(:account, domain: 'other.com', outbox_url: 'http://other.com/alice/outbox') }
let!(:outbox) do
{
'@context': 'https://www.w3.org/ns/activitystreams',
id: 'http://other.com/alice/outbox',
type: 'OrderedCollection',
first: 'http://other.com/alice/outbox?page=true',
}.with_indifferent_access
end
let!(:items) do
[
{
'@context': 'https://www.w3.org/ns/activitystreams',
id: 'https://other.com/alice/1234',
to: ['https://www.w3.org/ns/activitystreams#Public'],
cc: ['https://other.com/alice/followers'],
type: 'Note',
content: 'Lorem ipsum',
attributedTo: 'http://other.com/alice',
},
'https://other.com/alice/5678',
]
end
let!(:outbox_page) do
{
'@context': 'https://www.w3.org/ns/activitystreams',
id: 'http://example.com/alice/outbox?page=true',
type: 'OrderedCollectionPage',
orderedItems: items,
}
end
describe '#call' do
before do
stub_request(:get, 'http://other.com/alice/outbox').to_return(status: 200, body: Oj.dump(outbox), headers: { 'Content-Type': 'application/activity+json' })
stub_request(:get, 'http://other.com/alice/outbox?page=true').to_return(status: 200, body: Oj.dump(outbox_page), headers: { 'Content-Type': 'application/activity+json' })
end
it 'fetches the items in the outbox' do
allow(FetchReplyWorker).to receive(:push_bulk)
got_items = subject.call(account)
expect(got_items[0].deep_symbolize_keys).to eq(items[0])
expect(got_items[1]).to eq(items[1])
expect(FetchReplyWorker).to have_received(:push_bulk).with([items[0].stringify_keys, items[1]])
end
context 'with followers-only and private statuses' do
let!(:items) do
[
{
'@context': 'https://www.w3.org/ns/activitystreams',
id: 'https://other.com/alice/public',
type: 'Note',
to: ['https://www.w3.org/ns/activitystreams#Public'],
cc: ['https://other.com/alice/followers'],
content: 'Lorem ipsum',
attributedTo: 'http://other.com/alice',
},
{
'@context': 'https://www.w3.org/ns/activitystreams',
id: 'https://other.com/alice/unlisted',
to: ['https://other.com/alice/followers'],
cc: ['https://www.w3.org/ns/activitystreams#Public'],
type: 'Note',
content: 'Lorem ipsum',
attributedTo: 'http://other.com/alice',
},
{
'@context': 'https://www.w3.org/ns/activitystreams',
id: 'https://other.com/alice/followers-only',
to: ['https://other.com/alice/followers'],
type: 'Note',
content: 'Lorem ipsum',
attributedTo: 'http://other.com/alice',
},
{
'@context': 'https://www.w3.org/ns/activitystreams',
id: 'https://other.com/alice/dm',
to: ['https://other.com/alice/followers'],
type: 'Note',
content: 'Lorem ipsum',
attributedTo: 'http://other.com/alice',
},
]
end
it 'only processes public and unlisted statuses' do
allow(FetchReplyWorker).to receive(:push_bulk)
got_items = subject.call(account)
expect(got_items.length).to eq(2)
expect(got_items[0].deep_symbolize_keys).to eq(items[0])
expect(got_items[1].deep_symbolize_keys).to eq(items[1])
expect(FetchReplyWorker).to have_received(:push_bulk).with([items[0].stringify_keys, items[1].stringify_keys])
end
end
end
end