This commit is contained in:
Jonny Saunders 2025-05-03 11:04:45 +00:00 committed by GitHub
commit 06da0727a5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 91 additions and 125 deletions

View File

@ -203,6 +203,85 @@ module JsonLdHelper
end end
end end
# Iterate through the pages of an activitypub collection,
# returning the collected items and the number of pages that were fetched.
#
# @param collection_or_uri [String, Hash]
# either the URI or an already-fetched AP object
# @param max_pages [Integer, nil]
# Max pages to fetch, if nil, fetch until no more pages
# @param max_items [Integer, nil]
# Max items to fetch, if nil, fetch until no more items
# @param reference_uri [String, nil]
# If not nil, a URI to compare to the collection URI.
# If the host of the collection URI does not match the reference URI,
# do not fetch the collection page.
# @param on_behalf_of [Account, nil]
# Sign the request on behalf of the Account, if not nil
# @return [Array<Array<Hash>, Integer>, nil]
# The collection items and the number of pages fetched
def collection_items(collection_or_uri, max_pages: 1, max_items: nil, reference_uri: nil, on_behalf_of: nil)
collection = fetch_collection(collection_or_uri, reference_uri: reference_uri, on_behalf_of: on_behalf_of)
return unless collection.is_a?(Hash)
collection = fetch_collection(collection['first'], reference_uri: reference_uri, on_behalf_of: on_behalf_of) if collection['first'].present?
return unless collection.is_a?(Hash)
items = []
n_pages = 1
while collection.is_a?(Hash)
items.concat(as_array(collection_page_items(collection)))
break if !max_items.nil? && items.size >= max_items
break if !max_pages.nil? && n_pages >= max_pages
collection = collection['next'].present? ? fetch_collection(collection['next'], reference_uri: reference_uri, on_behalf_of: on_behalf_of) : nil
n_pages += 1
end
[items, n_pages]
end
def collection_page_items(collection)
case collection['type']
when 'Collection', 'CollectionPage'
collection['items']
when 'OrderedCollection', 'OrderedCollectionPage'
collection['orderedItems']
end
end
# Fetch a single collection page
# To get the whole collection, use collection_items
#
# @param collection_or_uri [String, Hash]
# @param reference_uri [String, nil]
# If not nil, a URI to compare to the collection URI.
# If the host of the collection URI does not match the reference URI,
# do not fetch the collection page.
# @param on_behalf_of [Account, nil]
# Sign the request on behalf of the Account, if not nil
# @return [Hash, nil]
def fetch_collection(collection_or_uri, reference_uri: nil, on_behalf_of: nil)
return collection_or_uri if collection_or_uri.is_a?(Hash)
return if !reference_uri.nil? && non_matching_uri_hosts?(reference_uri, collection_or_uri)
# NOTE: For backward compatibility reasons, Mastodon signs outgoing
# queries incorrectly by default.
#
# While this is relevant for all URLs with query strings, this is
# the only code path where this happens in practice.
#
# Therefore, retry with correct signatures if this fails.
begin
fetch_resource_without_id_validation(collection_or_uri, on_behalf_of, raise_on_error: :temporary)
rescue Mastodon::UnexpectedResponseError => e
raise unless e.response && e.response.code == 401 && Addressable::URI.parse(collection_or_uri).query.present?
fetch_resource_without_id_validation(collection_or_uri, on_behalf_of, raise_on_error: :temporary, request_options: { omit_query_string: false })
end
end
def valid_activitypub_content_type?(response) def valid_activitypub_content_type?(response)
return true if response.mime_type == 'application/activity+json' return true if response.mime_type == 'application/activity+json'

View File

@ -12,30 +12,12 @@ class ActivityPub::FetchFeaturedCollectionService < BaseService
return unless supported_context?(@json) return unless supported_context?(@json)
process_items(collection_items(@json)) @items, = collection_items(@json, max_pages: 1, reference_uri: @account.uri, on_behalf_of: local_follower)
process_items(@items)
end end
private private
def collection_items(collection)
collection = fetch_collection(collection['first']) if collection['first'].present?
return unless collection.is_a?(Hash)
case collection['type']
when 'Collection', 'CollectionPage'
as_array(collection['items'])
when 'OrderedCollection', 'OrderedCollectionPage'
as_array(collection['orderedItems'])
end
end
def fetch_collection(collection_or_uri)
return collection_or_uri if collection_or_uri.is_a?(Hash)
return if non_matching_uri_hosts?(@account.uri, collection_or_uri)
fetch_resource_without_id_validation(collection_or_uri, local_follower, raise_on_error: :temporary)
end
def process_items(items) def process_items(items)
return if items.nil? return if items.nil?

View File

@ -11,43 +11,12 @@ class ActivityPub::FetchFeaturedTagsCollectionService < BaseService
return unless supported_context?(@json) return unless supported_context?(@json)
process_items(collection_items(@json)) @items, = collection_items(@json, max_items: FeaturedTag::LIMIT, reference_uri: @account.uri, on_behalf_of: local_follower)
process_items(@items)
end end
private private
def collection_items(collection)
all_items = []
collection = fetch_collection(collection['first']) if collection['first'].present?
while collection.is_a?(Hash)
items = case collection['type']
when 'Collection', 'CollectionPage'
collection['items']
when 'OrderedCollection', 'OrderedCollectionPage'
collection['orderedItems']
end
break if items.blank?
all_items.concat(items)
break if all_items.size >= FeaturedTag::LIMIT
collection = collection['next'].present? ? fetch_collection(collection['next']) : nil
end
all_items
end
def fetch_collection(collection_or_uri)
return collection_or_uri if collection_or_uri.is_a?(Hash)
return if non_matching_uri_hosts?(@account.uri, collection_or_uri)
fetch_resource_without_id_validation(collection_or_uri, local_follower, raise_on_error: :temporary)
end
def process_items(items) def process_items(items)
names = items.filter_map { |item| item['type'] == 'Hashtag' && item['name']&.delete_prefix('#') }.take(FeaturedTag::LIMIT) names = items.filter_map { |item| item['type'] == 'Hashtag' && item['name']&.delete_prefix('#') }.take(FeaturedTag::LIMIT)
tags = names.index_by { |name| HashtagNormalizer.new.normalize(name) } tags = names.index_by { |name| HashtagNormalizer.new.normalize(name) }

View File

@ -8,9 +8,13 @@ class ActivityPub::FetchRepliesService < BaseService
def call(reference_uri, collection_or_uri, max_pages: 1, allow_synchronous_requests: true, request_id: nil) def call(reference_uri, collection_or_uri, max_pages: 1, allow_synchronous_requests: true, request_id: nil)
@reference_uri = reference_uri @reference_uri = reference_uri
@allow_synchronous_requests = allow_synchronous_requests return if !allow_synchronous_requests && !collection_or_uri.is_a?(Hash)
@items, n_pages = collection_items(collection_or_uri, max_pages: max_pages) # if given a prefetched collection while forbidding synchronous requests,
# process it and return without fetching additional pages
max_pages = 1 if !allow_synchronous_requests && collection_or_uri.is_a?(Hash)
@items, n_pages = collection_items(collection_or_uri, max_pages: max_pages, max_items: MAX_REPLIES, reference_uri: @reference_uri)
return if @items.nil? return if @items.nil?
@items = filter_replies(@items) @items = filter_replies(@items)
@ -21,58 +25,6 @@ class ActivityPub::FetchRepliesService < BaseService
private private
def collection_items(collection_or_uri, max_pages: 1)
collection = fetch_collection(collection_or_uri)
return unless collection.is_a?(Hash)
collection = fetch_collection(collection['first']) if collection['first'].present?
return unless collection.is_a?(Hash)
items = []
n_pages = 1
while collection.is_a?(Hash)
items.concat(as_array(collection_page_items(collection)))
break if items.size >= MAX_REPLIES
break if n_pages >= max_pages
collection = collection['next'].present? ? fetch_collection(collection['next']) : nil
n_pages += 1
end
[items, n_pages]
end
def collection_page_items(collection)
case collection['type']
when 'Collection', 'CollectionPage'
collection['items']
when 'OrderedCollection', 'OrderedCollectionPage'
collection['orderedItems']
end
end
def fetch_collection(collection_or_uri)
return collection_or_uri if collection_or_uri.is_a?(Hash)
return unless @allow_synchronous_requests
return if non_matching_uri_hosts?(@reference_uri, collection_or_uri)
# NOTE: For backward compatibility reasons, Mastodon signs outgoing
# queries incorrectly by default.
#
# While this is relevant for all URLs with query strings, this is
# the only code path where this happens in practice.
#
# Therefore, retry with correct signatures if this fails.
begin
fetch_resource_without_id_validation(collection_or_uri, nil, raise_on_error: :temporary)
rescue Mastodon::UnexpectedResponseError => e
raise unless e.response && e.response.code == 401 && Addressable::URI.parse(collection_or_uri).query.present?
fetch_resource_without_id_validation(collection_or_uri, nil, raise_on_error: :temporary, request_options: { omit_query_string: false })
end
end
def filter_replies(items) def filter_replies(items)
# Only fetch replies to the same server as the original status to avoid # Only fetch replies to the same server as the original status to avoid
# amplification attacks. # amplification attacks.

View File

@ -63,10 +63,10 @@ class ActivityPub::SynchronizeFollowersService < BaseService
# Only returns true if the whole collection has been processed # Only returns true if the whole collection has been processed
def process_collection!(collection_uri, max_pages: MAX_COLLECTION_PAGES) def process_collection!(collection_uri, max_pages: MAX_COLLECTION_PAGES)
collection = fetch_collection(collection_uri) collection = fetch_collection(collection_uri, reference_uri: @account.uri)
return false unless collection.is_a?(Hash) return false unless collection.is_a?(Hash)
collection = fetch_collection(collection['first']) if collection['first'].present? collection = fetch_collection(collection['first'], reference_uri: @account.uri) if collection['first'].present?
while collection.is_a?(Hash) while collection.is_a?(Hash)
process_page!(as_array(collection_page_items(collection))) process_page!(as_array(collection_page_items(collection)))
@ -81,20 +81,4 @@ class ActivityPub::SynchronizeFollowersService < BaseService
false false
end end
def collection_page_items(collection)
case collection['type']
when 'Collection', 'CollectionPage'
collection['items']
when 'OrderedCollection', 'OrderedCollectionPage'
collection['orderedItems']
end
end
def fetch_collection(collection_or_uri)
return collection_or_uri if collection_or_uri.is_a?(Hash)
return if non_matching_uri_hosts?(@account.uri, collection_or_uri)
fetch_resource_without_id_validation(collection_or_uri, nil, raise_on_error: :temporary)
end
end end