Refactor search query parser and handle nested quote types

This commit is contained in:
Claire 2023-09-01 10:15:27 +02:00
parent 4d9186a48c
commit 32eb0e7744
5 changed files with 43 additions and 47 deletions

View File

@ -1,15 +1,20 @@
# frozen_string_literal: true # frozen_string_literal: true
class SearchQueryParser < Parslet::Parser class SearchQueryParser < Parslet::Parser
rule(:term) { match('[^\s":]').repeat(1).as(:term) } rule(:term) { match('[^\s]').repeat(1).as(:term) }
rule(:quote) { str('"') }
rule(:colon) { str(':') } rule(:colon) { str(':') }
rule(:space) { match('\s').repeat(1) } rule(:space) { match('\s').repeat(1) }
rule(:operator) { (str('+') | str('-')).as(:operator) } rule(:operator) { (str('+') | str('-')).as(:operator) }
rule(:prefix) { term >> colon } rule(:prefix_operator) { str('has') | str('is') | str('language') | str('from') | str('before') | str('after') | str('during') | str('in') }
rule(:shortcode) { (colon >> term >> colon.maybe).as(:shortcode) } rule(:prefix) { prefix_operator.as(:prefix_operator) >> colon }
rule(:phrase) { (quote >> (match('[^\s"]').repeat(1).as(:term) >> space.maybe).repeat >> quote).as(:phrase) } rule(:phrase) do
rule(:clause) { (operator.maybe >> prefix.maybe.as(:prefix) >> (phrase | term | shortcode)).as(:clause) | prefix.as(:clause) | quote.as(:junk) } (str('"') >> match('[^"]').repeat.as(:phrase) >> str('"')) |
(match('[“”„]') >> match('[^“”„]').repeat.as(:phrase) >> match('[“”„]')) |
(str('«') >> match('[^«»]').repeat.as(:phrase) >> str('»')) |
(str('「') >> match('[^「」]').repeat.as(:phrase) >> str('」')) |
(str('《') >> match('[^《》]').repeat.as(:phrase) >> str('》'))
end
rule(:clause) { (operator.maybe >> prefix.maybe.as(:prefix) >> (phrase | term)).as(:clause) }
rule(:query) { (clause >> space.maybe).repeat.as(:query) } rule(:query) { (clause >> space.maybe).repeat.as(:query) }
root(:query) root(:query)
end end

View File

@ -1,17 +1,6 @@
# frozen_string_literal: true # frozen_string_literal: true
class SearchQueryTransformer < Parslet::Transform class SearchQueryTransformer < Parslet::Transform
SUPPORTED_PREFIXES = %w(
has
is
language
from
before
after
during
in
).freeze
class Query class Query
def initialize(clauses, options = {}) def initialize(clauses, options = {})
raise ArgumentError if options[:current_account].nil? raise ArgumentError if options[:current_account].nil?
@ -223,14 +212,12 @@ class SearchQueryTransformer < Parslet::Transform
end end
rule(clause: subtree(:clause)) do rule(clause: subtree(:clause)) do
prefix = clause[:prefix][:term].to_s if clause[:prefix] prefix = clause[:prefix][:prefix_operator].to_s if clause[:prefix]
operator = clause[:operator]&.to_s operator = clause[:operator]&.to_s
term = clause[:phrase] ? clause[:phrase].map { |term| term[:term].to_s }.join(' ') : clause[:term].to_s term = clause[:phrase] ? clause[:phrase].to_s : clause[:term].to_s
if clause[:prefix] && SUPPORTED_PREFIXES.include?(prefix) if clause[:prefix]
PrefixClause.new(prefix, operator, term, current_account: current_account) PrefixClause.new(prefix, operator, term, current_account: current_account)
elsif clause[:prefix]
TermClause.new(operator, "#{prefix} #{term}")
elsif clause[:term] elsif clause[:term]
TermClause.new(operator, term) TermClause.new(operator, term)
elsif clause[:phrase] elsif clause[:phrase]
@ -240,10 +227,6 @@ class SearchQueryTransformer < Parslet::Transform
end end
end end
rule(junk: subtree(:junk)) do
nil
end
rule(query: sequence(:clauses)) do rule(query: sequence(:clauses)) do
Query.new(clauses, current_account: current_account) Query.new(clauses, current_account: current_account)
end end

View File

@ -1,10 +1,8 @@
# frozen_string_literal: true # frozen_string_literal: true
class SearchService < BaseService class SearchService < BaseService
QUOTE_EQUIVALENT_CHARACTERS = /[“”„«»「」『』《》]/
def call(query, account, limit, options = {}) def call(query, account, limit, options = {})
@query = query&.strip&.gsub(QUOTE_EQUIVALENT_CHARACTERS, '"') @query = query&.strip
@account = account @account = account
@options = options @options = options
@limit = limit.to_i @limit = limit.to_i

View File

@ -10,11 +10,19 @@ describe SearchQueryParser do
it 'consumes "hello"' do it 'consumes "hello"' do
expect(parser.term).to parse('hello') expect(parser.term).to parse('hello')
end end
it 'consumes "foo:"' do
expect(parser.term).to parse('foo:')
end
it 'consumes ":foo:"' do
expect(parser.term).to parse(':foo:')
end
end end
context 'with prefix' do context 'with prefix' do
it 'consumes "foo:"' do it 'consumes "is:"' do
expect(parser.prefix).to parse('foo:') expect(parser.prefix).to parse('is:')
end end
end end
@ -28,16 +36,18 @@ describe SearchQueryParser do
end end
end end
context 'with shortcode' do
it 'consumes ":foo:"' do
expect(parser.shortcode).to parse(':foo:')
end
end
context 'with phrase' do context 'with phrase' do
it 'consumes "hello world"' do it 'consumes "hello world"' do
expect(parser.phrase).to parse('"hello world"') expect(parser.phrase).to parse('"hello world"')
end end
it 'consumes "hello “new” world"' do
expect(parser.phrase).to parse('"hello “new” world"')
end
it 'consumes “hello « hi » world”' do
expect(parser.phrase).to parse('“hello « hi » world”')
end
end end
context 'with clause' do context 'with clause' do
@ -57,14 +67,6 @@ describe SearchQueryParser do
expect(parser.clause).to parse('-foo:bar') expect(parser.clause).to parse('-foo:bar')
end end
it 'consumes \'foo:"hello world"\'' do
expect(parser.clause).to parse('foo:"hello world"')
end
it 'consumes \'-foo:"hello world"\'' do
expect(parser.clause).to parse('-foo:"hello world"')
end
it 'consumes "foo:"' do it 'consumes "foo:"' do
expect(parser.clause).to parse('foo:') expect(parser.clause).to parse('foo:')
end end
@ -94,5 +96,13 @@ describe SearchQueryParser do
it 'consumes "foo:bar bar: hello"' do it 'consumes "foo:bar bar: hello"' do
expect(parser.query).to parse('foo:bar bar: hello') expect(parser.query).to parse('foo:bar bar: hello')
end end
it 'consumes \'foo:"hello world"\'' do
expect(parser.query).to parse('foo:"hello world"')
end
it 'consumes \'-foo:"hello world"\'' do
expect(parser.query).to parse('-foo:"hello world"')
end
end end
end end

View File

@ -42,7 +42,7 @@ describe SearchQueryTransformer do
let(:query) { 'foo: bar' } let(:query) { 'foo: bar' }
it 'transforms clauses' do it 'transforms clauses' do
expect(subject.send(:must_clauses).map(&:term)).to match_array %w(foo bar) expect(subject.send(:must_clauses).map(&:term)).to match_array %w(foo: bar)
expect(subject.send(:must_not_clauses)).to be_empty expect(subject.send(:must_not_clauses)).to be_empty
expect(subject.send(:filter_clauses)).to be_empty expect(subject.send(:filter_clauses)).to be_empty
end end
@ -52,7 +52,7 @@ describe SearchQueryTransformer do
let(:query) { 'foo:bar' } let(:query) { 'foo:bar' }
it 'transforms clauses' do it 'transforms clauses' do
expect(subject.send(:must_clauses).map(&:term)).to contain_exactly('foo bar') expect(subject.send(:must_clauses).map(&:term)).to contain_exactly('foo:bar')
expect(subject.send(:must_not_clauses)).to be_empty expect(subject.send(:must_not_clauses)).to be_empty
expect(subject.send(:filter_clauses)).to be_empty expect(subject.send(:filter_clauses)).to be_empty
end end