From dd81abf3893e984fde8c9882f7d407e9ef3463c0 Mon Sep 17 00:00:00 2001 From: sneakers-the-rat Date: Fri, 2 May 2025 21:10:22 -0700 Subject: [PATCH 1/2] consolidate collection handling in jsonld helper --- app/helpers/json_ld_helper.rb | 79 +++++++++++++++++++ .../fetch_featured_collection_service.rb | 22 +----- .../fetch_featured_tags_collection_service.rb | 35 +------- .../activitypub/fetch_replies_service.rb | 60 ++------------ .../synchronize_followers_service.rb | 20 +---- 5 files changed, 91 insertions(+), 125 deletions(-) diff --git a/app/helpers/json_ld_helper.rb b/app/helpers/json_ld_helper.rb index 65daaa5302..e7fefa28bd 100644 --- a/app/helpers/json_ld_helper.rb +++ b/app/helpers/json_ld_helper.rb @@ -203,6 +203,85 @@ module JsonLdHelper end end + # Iterate through the pages of an activitypub collection, + # returning the collected items and the number of pages that were fetched. + # + # @param collection_or_uri [String, Hash] + # either the URI or an already-fetched AP object + # @param max_pages [Integer, nil] + # Max pages to fetch, if nil, fetch until no more pages + # @param max_items [Integer, nil] + # Max items to fetch, if nil, fetch until no more items + # @param reference_uri [String, nil] + # If not nil, a URI to compare to the collection URI. + # If the host of the collection URI does not match the reference URI, + # do not fetch the collection page. + # @param on_behalf_of [Account, nil] + # Sign the request on behalf of the Account, if not nil + # @return [Array, Integer>, nil] + # The collection items and the number of pages fetched + def collection_items(collection_or_uri, max_pages: 1, max_items: nil, reference_uri: nil, on_behalf_of: nil) + collection = fetch_collection(collection_or_uri, reference_uri: reference_uri, on_behalf_of: on_behalf_of) + return unless collection.is_a?(Hash) + + collection = fetch_collection(collection['first'], reference_uri: reference_uri, on_behalf_of: on_behalf_of) if collection['first'].present? + return unless collection.is_a?(Hash) + + items = [] + n_pages = 1 + while collection.is_a?(Hash) + items.concat(as_array(collection_page_items(collection))) + + break if !max_items.nil? & items.size >= max_items + break if !max_pages.nil? & n_pages >= max_pages + + collection = collection['next'].present? ? fetch_collection(collection['next'], reference_uri: reference_uri, on_behalf_of: on_behalf_of) : nil + n_pages += 1 + end + + [items, n_pages] + end + + def collection_page_items(collection) + case collection['type'] + when 'Collection', 'CollectionPage' + collection['items'] + when 'OrderedCollection', 'OrderedCollectionPage' + collection['orderedItems'] + end + end + + # Fetch a single collection page + # To get the whole collection, use collection_items + # + # @param collection_or_uri [String, Hash] + # @param reference_uri [String, nil] + # If not nil, a URI to compare to the collection URI. + # If the host of the collection URI does not match the reference URI, + # do not fetch the collection page. + # @param on_behalf_of [Account, nil] + # Sign the request on behalf of the Account, if not nil + # @return [Hash, nil] + def fetch_collection(collection_or_uri, reference_uri: nil, on_behalf_of: nil) + return collection_or_uri if collection_or_uri.is_a?(Hash) + return if !reference_uri.nil? & non_matching_uri_hosts?(reference_uri, collection_or_uri) + + # NOTE: For backward compatibility reasons, Mastodon signs outgoing + # queries incorrectly by default. + # + # While this is relevant for all URLs with query strings, this is + # the only code path where this happens in practice. + # + # Therefore, retry with correct signatures if this fails. + begin + fetch_resource_without_id_validation(collection_or_uri, on_behalf_of, raise_on_error: :temporary) + rescue Mastodon::UnexpectedResponseError => e + raise unless e.response && e.response.code == 401 && Addressable::URI.parse(collection_or_uri).query.present? + + fetch_resource_without_id_validation(collection_or_uri, on_behalf_of, raise_on_error: :temporary, request_options: { omit_query_string: false }) + end + end + def valid_activitypub_content_type?(response) return true if response.mime_type == 'application/activity+json' diff --git a/app/services/activitypub/fetch_featured_collection_service.rb b/app/services/activitypub/fetch_featured_collection_service.rb index 25c62f3be6..9d0b1a3717 100644 --- a/app/services/activitypub/fetch_featured_collection_service.rb +++ b/app/services/activitypub/fetch_featured_collection_service.rb @@ -12,30 +12,12 @@ class ActivityPub::FetchFeaturedCollectionService < BaseService return unless supported_context?(@json) - process_items(collection_items(@json)) + @items, = collection_items(@json, max_pages: 1, reference_uri: @account.uri, on_behalf_of: local_follower) + process_items(@items) end private - def collection_items(collection) - collection = fetch_collection(collection['first']) if collection['first'].present? - return unless collection.is_a?(Hash) - - case collection['type'] - when 'Collection', 'CollectionPage' - as_array(collection['items']) - when 'OrderedCollection', 'OrderedCollectionPage' - as_array(collection['orderedItems']) - end - end - - def fetch_collection(collection_or_uri) - return collection_or_uri if collection_or_uri.is_a?(Hash) - return if non_matching_uri_hosts?(@account.uri, collection_or_uri) - - fetch_resource_without_id_validation(collection_or_uri, local_follower, raise_on_error: :temporary) - end - def process_items(items) return if items.nil? diff --git a/app/services/activitypub/fetch_featured_tags_collection_service.rb b/app/services/activitypub/fetch_featured_tags_collection_service.rb index ec2422a075..696b4894d4 100644 --- a/app/services/activitypub/fetch_featured_tags_collection_service.rb +++ b/app/services/activitypub/fetch_featured_tags_collection_service.rb @@ -11,43 +11,12 @@ class ActivityPub::FetchFeaturedTagsCollectionService < BaseService return unless supported_context?(@json) - process_items(collection_items(@json)) + @items, = collection_items(@json, max_items: FeaturedTag::LIMIT, reference_uri: @account.uri, on_behalf_of: local_follower) + process_items(@items) end private - def collection_items(collection) - all_items = [] - - collection = fetch_collection(collection['first']) if collection['first'].present? - - while collection.is_a?(Hash) - items = case collection['type'] - when 'Collection', 'CollectionPage' - collection['items'] - when 'OrderedCollection', 'OrderedCollectionPage' - collection['orderedItems'] - end - - break if items.blank? - - all_items.concat(items) - - break if all_items.size >= FeaturedTag::LIMIT - - collection = collection['next'].present? ? fetch_collection(collection['next']) : nil - end - - all_items - end - - def fetch_collection(collection_or_uri) - return collection_or_uri if collection_or_uri.is_a?(Hash) - return if non_matching_uri_hosts?(@account.uri, collection_or_uri) - - fetch_resource_without_id_validation(collection_or_uri, local_follower, raise_on_error: :temporary) - end - def process_items(items) names = items.filter_map { |item| item['type'] == 'Hashtag' && item['name']&.delete_prefix('#') }.take(FeaturedTag::LIMIT) tags = names.index_by { |name| HashtagNormalizer.new.normalize(name) } diff --git a/app/services/activitypub/fetch_replies_service.rb b/app/services/activitypub/fetch_replies_service.rb index f2e4f45104..30481de789 100644 --- a/app/services/activitypub/fetch_replies_service.rb +++ b/app/services/activitypub/fetch_replies_service.rb @@ -8,9 +8,13 @@ class ActivityPub::FetchRepliesService < BaseService def call(reference_uri, collection_or_uri, max_pages: 1, allow_synchronous_requests: true, request_id: nil) @reference_uri = reference_uri - @allow_synchronous_requests = allow_synchronous_requests + return if !allow_synchronous_requests & !collection_or_uri.is_a?(Hash) - @items, n_pages = collection_items(collection_or_uri, max_pages: max_pages) + # if given a prefetched collection while forbidding synchronous requests, + # process it and return without fetching additional pages + max_pages = 1 if !allow_synchronous_requests & collection_or_uri.is_a?(Hash) + + @items, n_pages = collection_items(collection_or_uri, max_pages: max_pages, max_items: MAX_REPLIES, reference_uri: @reference_uri) return if @items.nil? @items = filter_replies(@items) @@ -21,58 +25,6 @@ class ActivityPub::FetchRepliesService < BaseService private - def collection_items(collection_or_uri, max_pages: 1) - collection = fetch_collection(collection_or_uri) - return unless collection.is_a?(Hash) - - collection = fetch_collection(collection['first']) if collection['first'].present? - return unless collection.is_a?(Hash) - - items = [] - n_pages = 1 - while collection.is_a?(Hash) - items.concat(as_array(collection_page_items(collection))) - - break if items.size >= MAX_REPLIES - break if n_pages >= max_pages - - collection = collection['next'].present? ? fetch_collection(collection['next']) : nil - n_pages += 1 - end - - [items, n_pages] - end - - def collection_page_items(collection) - case collection['type'] - when 'Collection', 'CollectionPage' - collection['items'] - when 'OrderedCollection', 'OrderedCollectionPage' - collection['orderedItems'] - end - end - - def fetch_collection(collection_or_uri) - return collection_or_uri if collection_or_uri.is_a?(Hash) - return unless @allow_synchronous_requests - return if non_matching_uri_hosts?(@reference_uri, collection_or_uri) - - # NOTE: For backward compatibility reasons, Mastodon signs outgoing - # queries incorrectly by default. - # - # While this is relevant for all URLs with query strings, this is - # the only code path where this happens in practice. - # - # Therefore, retry with correct signatures if this fails. - begin - fetch_resource_without_id_validation(collection_or_uri, nil, raise_on_error: :temporary) - rescue Mastodon::UnexpectedResponseError => e - raise unless e.response && e.response.code == 401 && Addressable::URI.parse(collection_or_uri).query.present? - - fetch_resource_without_id_validation(collection_or_uri, nil, raise_on_error: :temporary, request_options: { omit_query_string: false }) - end - end - def filter_replies(items) # Only fetch replies to the same server as the original status to avoid # amplification attacks. diff --git a/app/services/activitypub/synchronize_followers_service.rb b/app/services/activitypub/synchronize_followers_service.rb index fd6fd1b899..e9ac13ef64 100644 --- a/app/services/activitypub/synchronize_followers_service.rb +++ b/app/services/activitypub/synchronize_followers_service.rb @@ -63,10 +63,10 @@ class ActivityPub::SynchronizeFollowersService < BaseService # Only returns true if the whole collection has been processed def process_collection!(collection_uri, max_pages: MAX_COLLECTION_PAGES) - collection = fetch_collection(collection_uri) + collection = fetch_collection(collection_uri, reference_uri: @account.uri) return false unless collection.is_a?(Hash) - collection = fetch_collection(collection['first']) if collection['first'].present? + collection = fetch_collection(collection['first'], reference_uri: @account.uri) if collection['first'].present? while collection.is_a?(Hash) process_page!(as_array(collection_page_items(collection))) @@ -81,20 +81,4 @@ class ActivityPub::SynchronizeFollowersService < BaseService false end - - def collection_page_items(collection) - case collection['type'] - when 'Collection', 'CollectionPage' - collection['items'] - when 'OrderedCollection', 'OrderedCollectionPage' - collection['orderedItems'] - end - end - - def fetch_collection(collection_or_uri) - return collection_or_uri if collection_or_uri.is_a?(Hash) - return if non_matching_uri_hosts?(@account.uri, collection_or_uri) - - fetch_resource_without_id_validation(collection_or_uri, nil, raise_on_error: :temporary) - end end From 890157170ef2954f0852c4964698ec2dd074f2e6 Mon Sep 17 00:00:00 2001 From: sneakers-the-rat Date: Fri, 2 May 2025 22:40:42 -0700 Subject: [PATCH 2/2] oh right ruby uses double && for logical and --- app/helpers/json_ld_helper.rb | 6 +++--- app/services/activitypub/fetch_replies_service.rb | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/app/helpers/json_ld_helper.rb b/app/helpers/json_ld_helper.rb index e7fefa28bd..c4ba405d2c 100644 --- a/app/helpers/json_ld_helper.rb +++ b/app/helpers/json_ld_helper.rb @@ -232,8 +232,8 @@ module JsonLdHelper while collection.is_a?(Hash) items.concat(as_array(collection_page_items(collection))) - break if !max_items.nil? & items.size >= max_items - break if !max_pages.nil? & n_pages >= max_pages + break if !max_items.nil? && items.size >= max_items + break if !max_pages.nil? && n_pages >= max_pages collection = collection['next'].present? ? fetch_collection(collection['next'], reference_uri: reference_uri, on_behalf_of: on_behalf_of) : nil n_pages += 1 @@ -264,7 +264,7 @@ module JsonLdHelper # @return [Hash, nil] def fetch_collection(collection_or_uri, reference_uri: nil, on_behalf_of: nil) return collection_or_uri if collection_or_uri.is_a?(Hash) - return if !reference_uri.nil? & non_matching_uri_hosts?(reference_uri, collection_or_uri) + return if !reference_uri.nil? && non_matching_uri_hosts?(reference_uri, collection_or_uri) # NOTE: For backward compatibility reasons, Mastodon signs outgoing # queries incorrectly by default. diff --git a/app/services/activitypub/fetch_replies_service.rb b/app/services/activitypub/fetch_replies_service.rb index 30481de789..d89ad9cd5e 100644 --- a/app/services/activitypub/fetch_replies_service.rb +++ b/app/services/activitypub/fetch_replies_service.rb @@ -8,11 +8,11 @@ class ActivityPub::FetchRepliesService < BaseService def call(reference_uri, collection_or_uri, max_pages: 1, allow_synchronous_requests: true, request_id: nil) @reference_uri = reference_uri - return if !allow_synchronous_requests & !collection_or_uri.is_a?(Hash) + return if !allow_synchronous_requests && !collection_or_uri.is_a?(Hash) # if given a prefetched collection while forbidding synchronous requests, # process it and return without fetching additional pages - max_pages = 1 if !allow_synchronous_requests & collection_or_uri.is_a?(Hash) + max_pages = 1 if !allow_synchronous_requests && collection_or_uri.is_a?(Hash) @items, n_pages = collection_items(collection_or_uri, max_pages: max_pages, max_items: MAX_REPLIES, reference_uri: @reference_uri) return if @items.nil?