Compare commits

...

2 Commits

Author SHA1 Message Date
Jason Parker
5c554b500c
Merge 65f6fba745 into 94bceb8683 2025-07-11 14:05:41 +00:00
Jason Parker
65f6fba745 Add proper support for IDN links. 2024-10-08 18:14:53 -04:00

View File

@ -2,11 +2,27 @@
module Extractor
MAX_DOMAIN_LENGTH = 253
MAX_URL_LENGTH = 4096
extend Twitter::TwitterText::Extractor
module_function
def is_valid_domain(url_length, domain, protocol)
begin
raise ArgumentError.new("invalid empty domain") unless domain
original_domain_length = domain.length
encoded_domain = IDN::Idna.toASCII(domain, IDN::Idna::ALLOW_UNASSIGNED)
updated_domain_length = encoded_domain.length
url_length += (updated_domain_length - original_domain_length) if (updated_domain_length > original_domain_length)
url_length += URL_PROTOCOL_LENGTH unless protocol
url_length <= MAX_URL_LENGTH
# On error don't consider this a valid domain.
rescue Exception
return false
end
end
def extract_entities_with_indices(text, options = {}, &block)
entities = extract_urls_with_indices(text, options) +
extract_hashtags_with_indices(text, check_url_overlap: false) +