My fork of airsonic with experimental fixes and improvements. See branch "custom"
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

204 lines
8.3 KiB

/*
This file is part of Airsonic.
Airsonic is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Airsonic is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Airsonic. If not, see <http://www.gnu.org/licenses/>.
Copyright 2016 (C) Airsonic Authors
Based upon Subsonic, Copyright 2009 (C) Sindre Mehus
*/
package org.airsonic.player.service.search;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.cjk.CJKWidthFilterFactory;
import org.apache.lucene.analysis.core.KeywordTokenizerFactory;
import org.apache.lucene.analysis.core.LowerCaseFilterFactory;
import org.apache.lucene.analysis.core.StopFilterFactory;
import org.apache.lucene.analysis.custom.CustomAnalyzer;
import org.apache.lucene.analysis.custom.CustomAnalyzer.Builder;
import org.apache.lucene.analysis.en.EnglishPossessiveFilterFactory;
import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilterFactory;
import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;
import org.apache.lucene.analysis.pattern.PatternReplaceFilterFactory;
import org.apache.lucene.analysis.standard.StandardTokenizerFactory;
import org.springframework.stereotype.Component;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import static org.springframework.util.ObjectUtils.isEmpty;
/**
* Analyzer provider.
* This class is a division of what was once part of SearchService and added functionality.
* This class provides Analyzer which is used at index generation
* and QueryAnalyzer which analyzes the specified query at search time.
* Analyzer can be closed but is a reuse premise.
* It is held in this class.
*/
@Component
public final class AnalyzerFactory {
private static final String STOP_WORDS = "org/airsonic/player/service/search/analysis/stopwords.txt";
private static final String STOP_WORDS_ARTIST = "org/airsonic/player/service/search/analysis/stopwords_artist.txt";
private Analyzer analyzer;
private Analyzer queryAnalyzer;
/*
* XXX 3.x -> 8.x : Convert UAX#29 Underscore Analysis to Legacy Analysis
*
* Because changes in underscores before and after words
* have a major effect on user's forward match search.
*
* @see AnalyzerFactoryTestCase
*/
private void addTokenFilterForUnderscoreRemovalAroundToken(Builder builder) throws IOException {
builder
.addTokenFilter(PatternReplaceFilterFactory.class,
"pattern", "^\\_", "replacement", "", "replace", "all")
.addTokenFilter(PatternReplaceFilterFactory.class,
"pattern", "\\_$", "replacement", "", "replace", "all");
}
/*
* XXX 3.x -> 8.x : Handle brackets correctly
*
* Process the input value of Genre search for search of domain value.
*
* The tag parser performs special character conversion
* when converting input values from a file.
* Therefore, the domain value may be different from the original value.
* This filter allows searching by user readable value (file tag value).
*
* @see org.jaudiotagger.tag.id3.framebody.FrameBodyTCON#convertID3v23GenreToGeneric
* (TCON stands for Genre with ID3 v2.3-v2.4)
* Such processing exists because brackets in the Gener string have a special meaning.
*/
private void addTokenFilterForTokenToDomainValue(Builder builder) throws IOException {
builder
.addTokenFilter(PatternReplaceFilterFactory.class,
"pattern", "\\(", "replacement", "", "replace", "all")
.addTokenFilter(PatternReplaceFilterFactory.class,
"pattern", "\\)$", "replacement", "", "replace", "all")
.addTokenFilter(PatternReplaceFilterFactory.class,
"pattern", "\\)", "replacement", " ", "replace", "all")
.addTokenFilter(PatternReplaceFilterFactory.class,
"pattern", "\\{\\}", "replacement", "\\{ \\}", "replace", "all")
.addTokenFilter(PatternReplaceFilterFactory.class,
"pattern", "\\[\\]", "replacement", "\\[ \\]", "replace", "all");
}
private Builder createDefaultAnalyzerBuilder() throws IOException {
Builder builder = CustomAnalyzer.builder()
.withTokenizer(StandardTokenizerFactory.class)
.addTokenFilter(CJKWidthFilterFactory.class)
.addTokenFilter(ASCIIFoldingFilterFactory.class, "preserveOriginal", "false")
.addTokenFilter(LowerCaseFilterFactory.class)
.addTokenFilter(StopFilterFactory.class, "words", STOP_WORDS)
.addTokenFilter(EnglishPossessiveFilterFactory.class);
addTokenFilterForUnderscoreRemovalAroundToken(builder);
return builder;
}
private Builder createArtistAnalyzerBuilder() throws IOException {
Builder builder = CustomAnalyzer.builder()
.withTokenizer(StandardTokenizerFactory.class)
.addTokenFilter(CJKWidthFilterFactory.class)
.addTokenFilter(ASCIIFoldingFilterFactory.class, "preserveOriginal", "false")
.addTokenFilter(LowerCaseFilterFactory.class)
.addTokenFilter(StopFilterFactory.class, "words", STOP_WORDS_ARTIST)
.addTokenFilter(EnglishPossessiveFilterFactory.class);
addTokenFilterForUnderscoreRemovalAroundToken(builder);
return builder;
}
private Builder createKeywordAnalyzerBuilder() throws IOException {
return CustomAnalyzer.builder()
.withTokenizer(KeywordTokenizerFactory.class);
}
private Builder createGenreAnalyzerBuilder() throws IOException {
Builder builder = createKeywordAnalyzerBuilder();
addTokenFilterForTokenToDomainValue(builder);
return builder;
}
/**
* Returns the Analyzer to use when generating the index.
*
* Whether this analyzer is applied to input values depends on
* the definition of the document's fields.
*
* @return analyzer for index
* @see DocumentFactory
*/
public Analyzer getAnalyzer() throws IOException {
if (isEmpty(analyzer)) {
try {
Analyzer defaultAnalyzer = createDefaultAnalyzerBuilder().build();
Analyzer artistAnalyzer = createArtistAnalyzerBuilder().build();
Map<String, Analyzer> fieldAnalyzers = new HashMap<>();
fieldAnalyzers.put(FieldNames.ARTIST, artistAnalyzer);
analyzer = new PerFieldAnalyzerWrapper(defaultAnalyzer, fieldAnalyzers);
} catch (IOException e) {
throw new IOException("Error when initializing Analyzer.", e);
}
}
return analyzer;
}
/**
* Returns the analyzer to use when generating a query for index search.
*
* String processing handled by QueryFactory
* is limited to Lucene's modifier.
*
* The processing of the operands is expressed
* in the AnalyzerFactory implementation.
* Rules for tokenizing/converting input values
* should not be described in QueryFactory.
*
* @return analyzer for query
* @see QueryFactory
*/
public Analyzer getQueryAnalyzer() throws IOException {
if (isEmpty(queryAnalyzer)) {
try {
Analyzer defaultAnalyzer = createDefaultAnalyzerBuilder().build();
Analyzer artistAnalyzer = createArtistAnalyzerBuilder().build();
Analyzer genreAnalyzer = createGenreAnalyzerBuilder().build();
Map<String, Analyzer> fieldAnalyzers = new HashMap<>();
fieldAnalyzers.put(FieldNames.ARTIST, artistAnalyzer);
fieldAnalyzers.put(FieldNames.GENRE, genreAnalyzer);
queryAnalyzer = new PerFieldAnalyzerWrapper(defaultAnalyzer, fieldAnalyzers);
} catch (IOException e) {
throw new IOException("Error when initializing Analyzer.", e);
}
}
return queryAnalyzer;
}
}