/* This file is part of Airsonic. Airsonic is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Airsonic is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with Airsonic. If not, see . Copyright 2016 (C) Airsonic Authors Based upon Subsonic, Copyright 2009 (C) Sindre Mehus */ package org.airsonic.player.service.search; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.cjk.CJKWidthFilterFactory; import org.apache.lucene.analysis.core.KeywordTokenizerFactory; import org.apache.lucene.analysis.core.LowerCaseFilterFactory; import org.apache.lucene.analysis.core.StopFilterFactory; import org.apache.lucene.analysis.custom.CustomAnalyzer; import org.apache.lucene.analysis.custom.CustomAnalyzer.Builder; import org.apache.lucene.analysis.en.EnglishPossessiveFilterFactory; import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilterFactory; import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper; import org.apache.lucene.analysis.pattern.PatternReplaceFilterFactory; import org.apache.lucene.analysis.standard.StandardTokenizerFactory; import org.springframework.stereotype.Component; import java.io.IOException; import java.util.HashMap; import java.util.Map; import static org.springframework.util.ObjectUtils.isEmpty; /** * Analyzer provider. * This class is a division of what was once part of SearchService and added functionality. * This class provides Analyzer which is used at index generation * and QueryAnalyzer which analyzes the specified query at search time. * Analyzer can be closed but is a reuse premise. * It is held in this class. */ @Component public final class AnalyzerFactory { private static final String STOP_WORDS = "org/airsonic/player/service/search/analysis/stopwords.txt"; private static final String STOP_WORDS_ARTIST = "org/airsonic/player/service/search/analysis/stopwords_artist.txt"; private Analyzer analyzer; private Analyzer queryAnalyzer; /* * XXX 3.x -> 8.x : Convert UAX#29 Underscore Analysis to Legacy Analysis * * Because changes in underscores before and after words * have a major effect on user's forward match search. * * @see AnalyzerFactoryTestCase */ private void addTokenFilterForUnderscoreRemovalAroundToken(Builder builder) throws IOException { builder .addTokenFilter(PatternReplaceFilterFactory.class, "pattern", "^\\_", "replacement", "", "replace", "all") .addTokenFilter(PatternReplaceFilterFactory.class, "pattern", "\\_$", "replacement", "", "replace", "all"); } /* * XXX 3.x -> 8.x : Handle brackets correctly * * Process the input value of Genre search for search of domain value. * * The tag parser performs special character conversion * when converting input values ​​from a file. * Therefore, the domain value may be different from the original value. * This filter allows searching by user readable value (file tag value). * * @see org.jaudiotagger.tag.id3.framebody.FrameBodyTCON#convertID3v23GenreToGeneric * (TCON stands for Genre with ID3 v2.3-v2.4) * Such processing exists because brackets in the Gener string have a special meaning. */ private void addTokenFilterForTokenToDomainValue(Builder builder) throws IOException { builder .addTokenFilter(PatternReplaceFilterFactory.class, "pattern", "\\(", "replacement", "", "replace", "all") .addTokenFilter(PatternReplaceFilterFactory.class, "pattern", "\\)$", "replacement", "", "replace", "all") .addTokenFilter(PatternReplaceFilterFactory.class, "pattern", "\\)", "replacement", " ", "replace", "all") .addTokenFilter(PatternReplaceFilterFactory.class, "pattern", "\\{\\}", "replacement", "\\{ \\}", "replace", "all") .addTokenFilter(PatternReplaceFilterFactory.class, "pattern", "\\[\\]", "replacement", "\\[ \\]", "replace", "all"); } private Builder createDefaultAnalyzerBuilder() throws IOException { Builder builder = CustomAnalyzer.builder() .withTokenizer(StandardTokenizerFactory.class) .addTokenFilter(CJKWidthFilterFactory.class) .addTokenFilter(ASCIIFoldingFilterFactory.class, "preserveOriginal", "false") .addTokenFilter(LowerCaseFilterFactory.class) .addTokenFilter(StopFilterFactory.class, "words", STOP_WORDS) .addTokenFilter(EnglishPossessiveFilterFactory.class); addTokenFilterForUnderscoreRemovalAroundToken(builder); return builder; } private Builder createArtistAnalyzerBuilder() throws IOException { Builder builder = CustomAnalyzer.builder() .withTokenizer(StandardTokenizerFactory.class) .addTokenFilter(CJKWidthFilterFactory.class) .addTokenFilter(ASCIIFoldingFilterFactory.class, "preserveOriginal", "false") .addTokenFilter(LowerCaseFilterFactory.class) .addTokenFilter(StopFilterFactory.class, "words", STOP_WORDS_ARTIST) .addTokenFilter(EnglishPossessiveFilterFactory.class); addTokenFilterForUnderscoreRemovalAroundToken(builder); return builder; } private Builder createKeywordAnalyzerBuilder() throws IOException { return CustomAnalyzer.builder() .withTokenizer(KeywordTokenizerFactory.class); } private Builder createGenreAnalyzerBuilder() throws IOException { Builder builder = createKeywordAnalyzerBuilder(); addTokenFilterForTokenToDomainValue(builder); return builder; } /** * Returns the Analyzer to use when generating the index. * * Whether this analyzer is applied to input values ​​depends on * the definition of the document's fields. * * @return analyzer for index * @see DocumentFactory */ public Analyzer getAnalyzer() throws IOException { if (isEmpty(analyzer)) { try { Analyzer defaultAnalyzer = createDefaultAnalyzerBuilder().build(); Analyzer artistAnalyzer = createArtistAnalyzerBuilder().build(); Map fieldAnalyzers = new HashMap<>(); fieldAnalyzers.put(FieldNames.ARTIST, artistAnalyzer); analyzer = new PerFieldAnalyzerWrapper(defaultAnalyzer, fieldAnalyzers); } catch (IOException e) { throw new IOException("Error when initializing Analyzer.", e); } } return analyzer; } /** * Returns the analyzer to use when generating a query for index search. * * String processing handled by QueryFactory * is limited to Lucene's modifier. * * The processing of the operands is expressed * in the AnalyzerFactory implementation. * Rules for tokenizing/converting input values ​ * should not be described in QueryFactory. * * @return analyzer for query * @see QueryFactory */ public Analyzer getQueryAnalyzer() throws IOException { if (isEmpty(queryAnalyzer)) { try { Analyzer defaultAnalyzer = createDefaultAnalyzerBuilder().build(); Analyzer artistAnalyzer = createArtistAnalyzerBuilder().build(); Analyzer genreAnalyzer = createGenreAnalyzerBuilder().build(); Map fieldAnalyzers = new HashMap<>(); fieldAnalyzers.put(FieldNames.ARTIST, artistAnalyzer); fieldAnalyzers.put(FieldNames.GENRE, genreAnalyzer); queryAnalyzer = new PerFieldAnalyzerWrapper(defaultAnalyzer, fieldAnalyzers); } catch (IOException e) { throw new IOException("Error when initializing Analyzer.", e); } } return queryAnalyzer; } }