[solr] 为什么我的solr配置了分词,还是不能搜索中文了,搜索英文的时候没有问题!

yanglaoshi5891 2009-04-05
问题如上:
schema.xml文件的配置如下
xml代码 <fieldtype name="text" class="solr.TextField">   
           <analyzer type="index"  class="org.apache.lucene.analysis.cjk.CJKAnalyzer">   
                        <tokenizer class="org.apache.lucene.analysis.cjk.CJKTokenizer"/>   
               <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>   
               <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>   
               <filter class="solr.LowerCaseFilterFactory"/>   
               <filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>   
               <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>   
           </analyzer>   
            <analyzer type="query" class="org.apache.lucene.analysis.cjk.CJKAnalyzer">   
                        <tokenizer class="org.apache.lucene.analysis.cjk.CJKTokenizer"/>   
               <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>   
               <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>   
               <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"/>   
               <filter class="solr.LowerCaseFilterFactory"/>   
               <filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>   
               <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>   
           </analyzer>   
       </fieldtype>   
   </types>   
   <fields>   
  
<field name="id" type="slong" indexed="true" stored="true"/>   
       <field name="url" type="text" indexed="true" stored="true"/>   
       <field name="title" type="text" indexed="true" stored="true"/>   
       <field name="pubtime" type="date" indexed="true" stored="true"/>   
              <field name="keyword" type="text" indexed="true" stored="true" multiValued="true"/>   
    <field name="content" type="text" indexed="true" stored="true" multiValued="true"/>   
    <field name="creator"  type="text" indexed="true" stored="true" multiValued="true"/>   
       <field name="all" type="text" indexed="true" stored="true" multiValued="true"/>   
  
       <dynamicField name="*_s" type="string" indexed="true" stored="true"/>   
   </fields>  

	<fieldtype name="text" class="solr.TextField">
            <analyzer type="index"  class="org.apache.lucene.analysis.cjk.CJKAnalyzer">
				  			<tokenizer class="org.apache.lucene.analysis.cjk.CJKTokenizer"/>
                <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
                <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
                <filter class="solr.LowerCaseFilterFactory"/>
                <filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>
                <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
            </analyzer>
             <analyzer type="query" class="org.apache.lucene.analysis.cjk.CJKAnalyzer">
         				<tokenizer class="org.apache.lucene.analysis.cjk.CJKTokenizer"/>
                <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
                <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
                <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
                <filter class="solr.LowerCaseFilterFactory"/>
                <filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>
                <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
            </analyzer>
        </fieldtype>
    </types>
    <fields>

	<field name="id" type="slong" indexed="true" stored="true"/>
        <field name="url" type="text" indexed="true" stored="true"/>
        <field name="title" type="text" indexed="true" stored="true"/>
        <field name="pubtime" type="date" indexed="true" stored="true"/>
               <field name="keyword" type="text" indexed="true" stored="true" multiValued="true"/>
		<field name="content" type="text" indexed="true" stored="true" multiValued="true"/>
		<field name="creator"  type="text" indexed="true" stored="true" multiValued="true"/>
        <field name="all" type="text" indexed="true" stored="true" multiValued="true"/>

        <dynamicField name="*_s" type="string" indexed="true" stored="true"/>
    </fields>

为免过长,只截取主要的部分,哪位solr高手帮我看看,到底问题出在什么地方,如果要更详细的说明或有什么我遗忘的地方,还望提醒,谢谢呀!!
imjl 2009-05-22
配置完,先用solr的管理界面的analyzer.jsp(好像是这个)看下你设置的分词是否起作用了

如果起作用了,那么保证传递信息是utf-8,方式也是

接收数据也是。

反正所有都是utf-8就是啦。


Global site tag (gtag.js) - Google Analytics