[lucene] lucene查询时总是返回第一个Doc

bruceWoo 2010-04-30
最近做一个项目时需要频繁进行数据库like查询,所以想到用lucene来优化。网上找了些入门资料,找了个简单的测试程序,发现如果可以查询出结果时却总是返回第一个添加的Doc,比如下面测试代码:首先加入Doc的name域的值为“擎天柱”,第二个加入DOC的name域的值为“搜索引擎”,在搜索关键字为 “搜索 AND 擎”时却返回是第一个DOC,这是怎么回事呢?主要代码如下,请各位帮忙分析,谢谢。
--------------------------------------------------------------------------
public class TestQueryParser1 {  
  
    public static void main(String[] args) throws Exception {  
        String path = "E:/AppLog/bnq/index";  
        TestQueryParser1 tqp = new TestQueryParser1();  
        tqp.createIndex(path);  
        Searcher search = tqp.getSearcher(path);   
        System.out.println("#_3");  
        tqp.testTime(search, tqp.getQueryParser3());  
      
    }  
      
    public void testTime(Searcher search,Query query) throws IOException{  
        long start = new Date().getTime();
       
        TopDocs topDocs = search.search(query,GlobalVariables.MAX_QRY_RET_NUM);
       
        for (int i = 0; i < topDocs.totalHits; i++) {

            //ScoreDoc doc = topDocs.scoreDocs[i];
            Document document=search.doc(i);
            String content=document.get(BnqDTO.LNAME);
           
            System.out.println("匹配记录["+(i+1)+"]="+content);
          

        }
        System.out.println("本次搜索用时:" + ((new Date()).getTime() - start) + "毫秒");  
          
    }  
      
    public Searcher getSearcher(String path) throws CorruptIndexException, IOException{  
        Directory indexDir=FSDirectory.open(new File(path));
        IndexSearcher idxSearcher = new IndexSearcher(indexDir,true);
        return idxSearcher;
        //return new IndexSearcher(path);  
    }  
  
      
    public Query getQueryParser3(){  
        QueryParser queryParser=new QueryParser(Version.LUCENE_29, "name",new ChineseAnalyzer());
        //QueryParser queryParser = new QueryParser(BnqDTO.LNAME, new StandardAnalyzer());  
        try {  
            BooleanQuery query=new BooleanQuery();
            query.add(new TermQuery(new Term(BnqDTO.LNAME,"搜索")),BooleanClause.Occur.MUST);
            query.add(new TermQuery(new Term(BnqDTO.LNAME,"擎")),BooleanClause.Occur.MUST);
           
            //return query;
            return queryParser.parse("搜索 AND 擎");  
        } catch (Exception e) {  
            e.printStackTrace();  
        }  
        return null;  
    }  
      
   
      
    public void createIndex(String path){  
        try {  
            //FileUtil.clearDir(path);
           
            Analyzer analyzer=new ChineseAnalyzer();
           
            Directory indexDir=FSDirectory.open(new File(path));
           
            IndexWriter writer = new IndexWriter(indexDir,analyzer,IndexWriter.MaxFieldLength.UNLIMITED);
           
          
            //original
           
            //IndexWriter writer = new IndexWriter(path,new StandardAnalyzer(),true);  
            Document docA = new Document();  
            //相当于数据库中列的概念,因此第一个参数是列名,第二个参数是列的值,最后两个参数是enum类型的(JDK1.5),对创建的索引的设置  
            //Field.Store 是否覆盖原来的索引文件,而不是重新建一个  
            Field fieldA = new Field("name","搜索引擎",Field.Store.YES,Field.Index.ANALYZED);  
            //我们把列(fieldA)加到某一行(docA)中  
            docA.add(fieldA);  
              
            docA.add(new Field("title","你好中国",Field.Store.YES,Field.Index.NOT_ANALYZED));  
            docA.add(new Field("lastModifyTime","2008-9-17",Field.Store.YES,Field.Index.NOT_ANALYZED));  
            docA.add(new Field("testCapital","HelloWangzi",Field.Store.YES,Field.Index.NOT_ANALYZED));  
            docA.add(new Field("testAndOr","test and",Field.Store.YES,Field.Index.NOT_ANALYZED));  
              
            Document docB = new Document();  
            //相当于数据库中列的概念,因此第一个参数是列名,第二个参数是列的值,最后两个参数是enum类型的(JDK1.5),对创建的索引的设置  
            Field fieldB = new Field("name","擎天柱",Field.Store.YES,Field.Index.ANALYZED);  
            //我们把列(fieldA)加到某一行(docA)中  
            docB.add(fieldB);  
            docB.add(new Field("title","你好世界",Field.Store.YES,Field.Index.NOT_ANALYZED));    
            docB.add(new Field("lastModifyTime","2008-9-6",Field.Store.YES,Field.Index.NOT_ANALYZED));  
            docB.add(new Field("testCapital","hellowangZi",Field.Store.YES,Field.Index.NOT_ANALYZED));  
            docB.add(new Field("testAndOr","test or",Field.Store.YES,Field.Index.NOT_ANALYZED));  
            
            Document docC = new Document();  
            //相当于数据库中列的概念,因此第一个参数是列名,第二个参数是列的值,最后两个参数是enum类型的(JDK1.5),对创建的索引的设置  
            Field fieldC = new Field("name","CCCAAABBB",Field.Store.YES,Field.Index.ANALYZED);  
            //我们把列(fieldA)加到某一行(docA)中  
            docB.add(fieldC);  
            docB.add(new Field("title","你好世界",Field.Store.YES,Field.Index.NOT_ANALYZED));    
            docB.add(new Field("lastModifyTime","2008-9-6",Field.Store.YES,Field.Index.NOT_ANALYZED));  
            docB.add(new Field("testCapital","hellowangZi",Field.Store.YES,Field.Index.NOT_ANALYZED));  
            docB.add(new Field("testAndOr","test or",Field.Store.YES,Field.Index.NOT_ANALYZED));  
          
            writer.addDocument(docB);  
            writer.addDocument(docA);
            writer.addDocument(docC);
            //如果对海量数据进行创建索引的时候,需要对索引进行优化,以便提高速度  
            writer.optimize();  
              
            //跟数据库类似,打开一个连接,使用完后,要关闭它  
            writer.close();  
        } catch (CorruptIndexException e) {  
            e.printStackTrace();  
        } catch (LockObtainFailedException e) {  
            e.printStackTrace();  
        } catch (IOException e) {  
            e.printStackTrace();  
        }  
    }  
  
}
bruceWoo 2010-05-07
大家帮忙测试这个程序看看我的问题是否会重现,程序中使用的一些常量可以自己定义,谢谢大家
xanpeng 2010-05-08
bruceWoo 写道
大家帮忙测试这个程序看看我的问题是否会重现,程序中使用的一些常量可以自己定义,谢谢大家

提供下 Analyzer 的 jar 包吧, GlobalVariables, BnqDTO, ChineseAnalyzer 都无法识别
Global site tag (gtag.js) - Google Analytics