关于中文搜索时使用*通配符不起作用的问题

cjx186 2008-04-24
本人想实现一个智能提示功能,索引文件已经建立,查询时,发现一个问题,查英文时使用通配符*或者?是可以的,查查中文时加?查不到,加*查得到,但不符合规则是模糊查询,不加也是模糊查询。
package com.aladdin.alagis.suggest;

/***
 * @deprecated 2008-4-23 
 * @author chenjianxiang
 * @deprecated 智能提示
 *
 */
import java.io.IOException;
import java.io.PrintWriter;

import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;

import com.aladdin.alagis.util.ConfigFilePath;
import com.aladdin.util.Escape;

public class Suggest extends javax.servlet.http.HttpServlet implements
		javax.servlet.Servlet {
	static final long serialVersionUID = 1L;
	private static String sTemplateStart = "UBC('<DIV id=\"{key}\" style=\"DISPLAY: none\"></DIV><DIV style=\"Z-INDEX: 12\"><TABLE cellSpacing=\"1\" cellPadding=\"0\" width=\"100%\" align=\"center\" bgColor=\"#979797\" border=\"0\"><TBODY><TR><TD vAlign=\"top\"><TABLE cellSpacing=\"0\" cellPadding=\"0\" width=\"100%\" align=\"center\" border=\"0\"><TBODY>";
	private static String sTemplateContent = "<TR svalue=\"{svalue}\" onSelect=\"this.txtBox.value=\\'{svalue}\\'\"><TD class=\"remindtt75\" align=\"left\" bgColor=\"#ffffff\">{svalue}</TD><TD class=\"remindtt752\" align=\"right\" bgColor=\"#ffffff\"></TD></TR>";
	private static String sTemplateEnd = "</TBODY></TABLE><TABLE cellSpacing=\"0\" cellPadding=\"0\" width=\"100%\" align=\"center\" border=\"0\" onSelect=\"this.txtBox.value=\\'\\'\"><TBODY><TR><TD bgColor=\"#dddddd\" colSpan=\"2\" height=\"1\"></TD></TR><TR><TD class=\"jstxhuitiaoyou\" align=\"right\" bgColor=\"#ecf0ef\" height=\"17\"><A class=\"jstxlan\" onclick=\"TurnOffSuggest();\">关闭提示功能</A> </TD></TR></TBODY></TABLE></TD></TR></TBODY></TABLE></DIV>')";
	private static String sTemplateUnFind ="UBC('<DIV id=\"{key}\" style=\"DISPLAY: none\"></DIV>')";
	public Suggest() {
		super();
	}

	protected void doGet(HttpServletRequest request,
			HttpServletResponse response) throws ServletException, IOException {
		this.doPost(request, response);
	}

	protected void doPost(HttpServletRequest request,
			HttpServletResponse response) throws ServletException, IOException {
		String key = Escape.unescape(request.getParameter("key"));
		response.setCharacterEncoding("UTF-8");
		response.setContentType("text/html; charset=utf-8");
		PrintWriter out = response.getWriter();

		IndexSearcher searcher = new IndexSearcher(ConfigFilePath.getConfigPath("index"));
		Hits hits = null;		Query query = null;
		QueryParser qp = new QueryParser("word",new StandardAnalyzer());
		try{
			query = qp.parse(key+"*");
		}catch(ParseException e){
			e.printStackTrace();
		}
		hits = searcher.search(query);
		int length = hits.length()>15?15:hits.length();
		//if none
		if(length<1){
			out.print(sTemplateUnFind.replaceAll("\\{key}", key));
			out.flush();
			return;
		}
		//format
		StringBuffer result = new StringBuffer();
		for(int i=0;i<length;i++){
			String sTemplate="<TR svalue=\"{svalue}\" onSelect=\"this.txtBox.value=\\'{svalue}\\'\"><TD class=\"remindtt75\" align=\"left\" bgColor=\"#ffffff\">{svalue}</TD><TD class=\"remindtt752\" align=\"right\" bgColor=\"#ffffff\"></TD></TR>";
			result.append(sTemplate.replaceAll("\\{svalue}", hits.doc(i).get("word")));
		} 
		
		out.print(sTemplateStart.replaceAll("\\{key}", key)+result.toString()+sTemplateEnd.replaceAll("\\{key}", key));
		out.flush();

	}
}
licco1 2008-05-04
这个得看分词器的语法了,你是用StandardAnalyzer,我在action里看到保准分词器分析非字母文字是用terrible来形容的。可以看看里面那个cc语法文件
p_x1984 2009-07-16
StandardAnalyzer这个分词器它是把每一个中文都单个的给分开,所以你在使用通配符查的时候不行。试试其它的分词器比如paoding,IK等。
Global site tag (gtag.js) - Google Analytics