lucene.net 3.0.3、结合盘古分词进行搜索的小例子（分页功能）

icecity1306 2016-05-21
展开全文
添加：2013-12-25
更新：2013-12-26 新增分页功能。
更新：2013-12-27 新增按分类查询功能，调整索引行新增记录的图片字段。
最新盘古分词dll和词典管理工具下载：http://pangusegment./
词典下载：http://pangusegment./releases/view/47411
//封装类
[csharp] view plain copy print?
using System;  
using System.Collections.Generic;  
using System.Linq;  
using System.Web;  
using Lucene.Net.Analysis;  
using Lucene.Net.Index;  
using Lucene.Net.Documents;  
using System.Reflection;  
using Lucene.Net.QueryParsers;  
using Lucene.Net.Search;  
namespace SearchTest  
{  
    /// <summary>  
    /// 盘古分词在lucene.net中的使用帮助类  
    /// 调用PanGuLuceneHelper.instance  
    /// </summary>  
    public class PanGuLuceneHelper  
    {  
        private PanGuLuceneHelper() { }  
 
        #region 单一实例  
        private static PanGuLuceneHelper _instance = null;  
        /// <summary>  
        /// 单一实例  
        /// </summary>  
        public static PanGuLuceneHelper instance  
        {  
            get  
            {  
                if (_instance == null) _instance = new PanGuLuceneHelper();  
                return _instance;  
            }  
        }  
        #endregion  
 
        #region 分词测试  
        /// <summary>  
        /// 分词测试  
        /// </summary>  
        /// <param name="keyword"></param>  
        /// <returns></returns>  
        public string Token(string keyword)  
        {  
            string ret = "";  
            System.IO.StringReader reader = new System.IO.StringReader(keyword);  
            Lucene.Net.Analysis.TokenStream ts = analyzer.TokenStream(keyword, reader);  
            bool hasNext = ts.IncrementToken();  
            Lucene.Net.Analysis.Tokenattributes.ITermAttribute ita;  
            while (hasNext)  
            {  
                ita = ts.GetAttribute<Lucene.Net.Analysis.Tokenattributes.ITermAttribute>();  
                ret += ita.Term + "|";  
                hasNext = ts.IncrementToken();  
            }  
            ts.CloneAttributes();  
            reader.Close();  
            analyzer.Close();  
            return ret;  
        }  
        #endregion  
 
        #region 创建索引  
        /// <summary>  
        /// 创建索引  
        /// </summary>  
        /// <param name="datalist"></param>  
        /// <returns></returns>  
        public bool CreateIndex(List<MySearchUnit> datalist)  
        {  
            IndexWriter writer = null;  
            try  
            {  
                writer = new IndexWriter(directory_luce, analyzer, false, IndexWriter.MaxFieldLength.LIMITED);//false表示追加（true表示删除之前的重新写入）  
            }  
            catch  
            {  
                writer = new IndexWriter(directory_luce, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);//false表示追加（true表示删除之前的重新写入）  
            }  
            foreach (MySearchUnit data in datalist)  
            {  
                CreateIndex(writer, data);  
            }  
            writer.Optimize();  
            writer.Dispose();  
            return true;  
        }  
  
        public bool CreateIndex(IndexWriter writer, MySearchUnit data)  
        {  
            try  
            {  
  
                if (data == null) return false;  
                Document doc = new Document();  
                Type type = data.GetType();//assembly.GetType("Reflect_test.PurchaseOrderHeadManageModel", true, true); //命名空间名称 + 类名      
  
                //创建类的实例      
                //object obj = Activator.CreateInstance(type, true);    
                //获取公共属性      
                PropertyInfo[] Propertys = type.GetProperties();  
                for (int i = 0; i < Propertys.Length; i++)  
                {  
                    //Propertys[i].SetValue(Propertys[i], i, null); //设置值  
                    PropertyInfo pi = Propertys[i];  
                    string name=pi.Name;  
                    object objval = pi.GetValue(data, null);  
                    string value = objval == null ? "" : objval.ToString(); //值  
                    if (name == "id" || name=="flag" )//id在写入索引时必是不分词，否则是模糊搜索和删除，会出现混乱  
                    {  
                        doc.Add(new Field(name, value, Field.Store.YES, Field.Index.NOT_ANALYZED));//id不分词  
                    }  
                    else  
                    {  
                        doc.Add(new Field(name, value, Field.Store.YES, Field.Index.ANALYZED));  
                    }  
                }  
                writer.AddDocument(doc);  
            }  
            catch (System.IO.FileNotFoundException fnfe)  
            {  
                throw fnfe;  
            }  
            return true;  
        }  
        #endregion  
 
        #region 在title和content字段中查询数据  
        /// <summary>  
        /// 在title和content字段中查询数据  
        /// </summary>  
        /// <param name="keyword"></param>  
        /// <returns></returns>  
        public List<MySearchUnit> Search(string keyword)  
        {  
  
            string[] fileds = { "title", "content" };//查询字段  
            //Stopwatch st = new Stopwatch();  
            //st.Start();  
            QueryParser parser = null;// new QueryParser(Lucene.Net.Util.Version.LUCENE_30, field, analyzer);//一个字段查询  
            parser = new MultiFieldQueryParser(version, fileds, analyzer);//多个字段查询  
            Query query = parser.Parse(keyword);  
            int n = 1000;  
            IndexSearcher searcher = new IndexSearcher(directory_luce, true);//true-表示只读  
            TopDocs docs = searcher.Search(query, (Filter)null, n);  
            if (docs == null || docs.TotalHits == 0)  
            {  
                return null;  
            }  
            else  
            {  
                List<MySearchUnit> list = new List<MySearchUnit>();  
                int counter = 1;  
                foreach (ScoreDoc sd in docs.ScoreDocs)//遍历搜索到的结果  
                {  
                    try  
                    {  
                        Document doc = searcher.Doc(sd.Doc);  
                        string id = doc.Get("id");  
                        string title = doc.Get("title");  
                        string content = doc.Get("content");  
                        string flag = doc.Get("flag");  
                        string imageurl = doc.Get("imageurl");  
                        string updatetime = doc.Get("updatetime");  
  
                        string createdate = doc.Get("createdate");  
                        PanGu.HighLight.SimpleHTMLFormatter simpleHTMLFormatter = new PanGu.HighLight.SimpleHTMLFormatter("<font color=\"red\">", "</font>");  
                        PanGu.HighLight.Highlighter highlighter = new PanGu.HighLight.Highlighter(simpleHTMLFormatter, new PanGu.Segment());  
                        highlighter.FragmentSize = 50;  
                        content = highlighter.GetBestFragment(keyword, content);  
                        string titlehighlight = highlighter.GetBestFragment(keyword, title);  
                        if (titlehighlight != "") title = titlehighlight;  
                        list.Add(new MySearchUnit(id, title, content, flag,imageurl, updatetime));  
                    }  
                    catch (Exception ex)  
                    {  
                        Console.WriteLine(ex.Message);  
                    }  
                    counter++;  
                }  
                return list;  
            }  
            //st.Stop();  
            //Response.Write("查询时间：" + st.ElapsedMilliseconds + " 毫秒<br/>");  
  
        }  
        #endregion  
 
        #region 在不同的分类下再根据title和content字段中查询数据(分页)  
        /// <summary>  
        /// 在不同的类型下再根据title和content字段中查询数据(分页)  
        /// </summary>  
        /// <param name="_flag">分类,传空值查询全部</param>  
        /// <param name="keyword"></param>  
        /// <param name="PageIndex"></param>  
        /// <param name="PageSize"></param>  
        /// <param name="TotalCount"></param>  
        /// <returns></returns>  
        public List<MySearchUnit> Search(string _flag,string keyword, int PageIndex, int PageSize, out int TotalCount)  
        {  
            if (PageIndex < 1) PageIndex = 1;  
            //Stopwatch st = new Stopwatch();  
            //st.Start();  
            BooleanQuery bq = new BooleanQuery();  
            if (_flag != "")  
            {  
                QueryParser qpflag = new QueryParser(version, "flag", analyzer);  
                Query qflag = qpflag.Parse(_flag);  
                bq.Add(qflag, Occur.MUST);//与运算  
            }  
            if (keyword != "")  
            {  
                string[] fileds = { "title", "content" };//查询字段  
                QueryParser parser = null;// new QueryParser(version, field, analyzer);//一个字段查询  
                parser = new MultiFieldQueryParser(version, fileds, analyzer);//多个字段查询  
                Query queryKeyword = parser.Parse(keyword);  
                bq.Add(queryKeyword, Occur.MUST);//与运算  
            }  
              
            TopScoreDocCollector collector = TopScoreDocCollector.Create(PageIndex * PageSize, false);  
            IndexSearcher searcher = new IndexSearcher(directory_luce, true);//true-表示只读  
            searcher.Search(bq, collector);  
            if (collector == null || collector.TotalHits == 0)  
            {  
                TotalCount = 0;  
                return null;  
            }  
            else  
            {  
                int start = PageSize * (PageIndex - 1);  
                //结束数  
                int limit = PageSize;  
                ScoreDoc[] hits = collector.TopDocs(start, limit).ScoreDocs;  
                List<MySearchUnit> list = new List<MySearchUnit>();  
                int counter = 1;  
                TotalCount = collector.TotalHits;  
                foreach (ScoreDoc sd in hits)//遍历搜索到的结果  
                {  
                    try  
                    {  
                        Document doc = searcher.Doc(sd.Doc);  
                        string id = doc.Get("id");  
                        string title = doc.Get("title");  
                        string content = doc.Get("content");  
                        string flag = doc.Get("flag");  
                        string imageurl = doc.Get("imageurl");  
                        string updatetime = doc.Get("updatetime");  
  
                        PanGu.HighLight.SimpleHTMLFormatter simpleHTMLFormatter = new PanGu.HighLight.SimpleHTMLFormatter("<font color=\"red\">", "</font>");  
                        PanGu.HighLight.Highlighter highlighter = new PanGu.HighLight.Highlighter(simpleHTMLFormatter, new PanGu.Segment());  
                        highlighter.FragmentSize = 50;  
                        content = highlighter.GetBestFragment(keyword, content);  
                        string titlehighlight = highlighter.GetBestFragment(keyword, title);  
                        if (titlehighlight != "") title = titlehighlight;  
                        list.Add(new MySearchUnit(id, title, content, flag,imageurl, updatetime));  
                    }  
                    catch (Exception ex)  
                    {  
                        Console.WriteLine(ex.Message);  
                    }  
                    counter++;  
                }  
                return list;  
            }  
            //st.Stop();  
            //Response.Write("查询时间：" + st.ElapsedMilliseconds + " 毫秒<br/>");  
  
        }  
        #endregion  
 
        #region 删除索引数据（根据id）  
        /// <summary>  
        /// 删除索引数据（根据id）  
        /// </summary>  
        /// <param name="id"></param>  
        /// <returns></returns>  
        public bool Delete(string id)  
        {  
            bool IsSuccess = false;  
            Term term = new Term("id", id);  
            //Analyzer analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);  
            //Version version = new Version();  
            //MultiFieldQueryParser parser = new MultiFieldQueryParser(version, new string[] { "name", "job" }, analyzer);//多个字段查询  
            //Query query = parser.Parse("小王");  
  
            //IndexReader reader = IndexReader.Open(directory_luce, false);  
            //reader.DeleteDocuments(term);  
            //Response.Write("删除记录结果： " + reader.HasDeletions + "<br/>");  
            //reader.Dispose();  
  
            IndexWriter writer = new IndexWriter(directory_luce, analyzer, false, IndexWriter.MaxFieldLength.LIMITED);  
            writer.DeleteDocuments(term); // writer.DeleteDocuments(term)或者writer.DeleteDocuments(query);  
            ////writer.DeleteAll();  
            writer.Commit();  
            //writer.Optimize();//  
            IsSuccess = writer.HasDeletions();  
            writer.Dispose();  
            return IsSuccess;  
        }  
        #endregion  
 
        #region 删除全部索引数据  
        /// <summary>  
        /// 删除全部索引数据  
        /// </summary>  
        /// <returns></returns>  
        public bool DeleteAll()  
        {  
            bool IsSuccess = true;  
            try  
            {  
                IndexWriter writer = new IndexWriter(directory_luce, analyzer, false, IndexWriter.MaxFieldLength.LIMITED);  
                writer.DeleteAll();  
                writer.Commit();  
                //writer.Optimize();//  
                IsSuccess = writer.HasDeletions();  
                writer.Dispose();  
            }  
            catch  
            {  
                IsSuccess = false;  
            }  
            return IsSuccess;  
        }  
        #endregion  
 
        #region directory_luce  
        private Lucene.Net.Store.Directory _directory_luce = null;  
        /// <summary>  
        /// Lucene.Net的目录-参数  
        /// </summary>  
        public Lucene.Net.Store.Directory directory_luce  
        {  
            get  
            {  
                if (_directory_luce == null) _directory_luce = Lucene.Net.Store.FSDirectory.Open(directory);  
                return _directory_luce;  
            }  
        }   
        #endregion  
 
        #region directory  
        private System.IO.DirectoryInfo _directory = null;  
        /// <summary>  
        /// 索引在硬盘上的目录  
        /// </summary>  
        public System.IO.DirectoryInfo directory  
        {  
            get  
            {  
                if (_directory == null)  
                {  
                    string dirPath = AppDomain.CurrentDomain.BaseDirectory + "SearchIndex";  
                    if (System.IO.Directory.Exists(dirPath) == false) _directory = System.IO.Directory.CreateDirectory(dirPath);  
                    else _directory = new System.IO.DirectoryInfo(dirPath);  
                }  
                return _directory;  
            }  
        }   
        #endregion  
 
        #region analyzer  
        private Analyzer _analyzer = null;  
        /// <summary>  
        /// 分析器  
        /// </summary>  
        public Analyzer analyzer  
        {  
            get  
            {  
                //if (_analyzer == null)  
                {  
                    _analyzer = new Lucene.Net.Analysis.PanGu.PanGuAnalyzer();//盘古分词分析器  
                    //_analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);//标准分析器  
                }  
                return _analyzer;  
            }  
        }   
        #endregion  
 
        #region version  
        private static Lucene.Net.Util.Version _version = Lucene.Net.Util.Version.LUCENE_30;  
        /// <summary>  
        /// 版本号枚举类  
        /// </summary>  
        public Lucene.Net.Util.Version version  
        {  
            get  
            {  
                return _version;  
            }  
        }  
        #endregion  
    }  
 
    #region 索引的一个行单元，相当于数据库中的一行数据  
    /// <summary>  
    /// 索引的一个行单元，相当于数据库中的一行数据  
    /// </summary>  
    public class MySearchUnit  
    {  
        public MySearchUnit(string _id, string _title, string _content, string _flag, string _imageurl, string _updatetime)  
        {  
            this.id = _id;  
            this.title = _title;  
            this.content = _content;  
            this.flag = _flag;  
            this.imageurl = _imageurl;  
            this.updatetime = _updatetime;  
        }  
        /// <summary>  
        /// 唯一的id号  
        /// </summary>  
        public string id { get; set; }  
        /// <summary>  
        /// 标题  
        /// </summary>  
        public string title { get; set; }  
        /// <summary>  
        /// 内容  
        /// </summary>  
        public string content { get; set; }  
        /// <summary>  
        /// 其他信息  
        /// </summary>  
        public string flag { get; set; }  
        /// <summary>  
        /// 图片路径  
        /// </summary>  
        public string imageurl { get; set; }  
        /// <summary>  
        /// 时间  
        /// </summary>  
        public string updatetime { get; set; }  
    }   
    #endregion  
}  



//调用测试

[csharp] view plain copy print?
protected void Page_Load(object sender, EventArgs e)  
 {  
     //PanGuLuceneHelper.instance.DeleteAll();//删除全部  
  
     //PanGuLuceneHelper.instance.Delete("1d");//根据id删除  
     bool exec = false;  
     if (exec)  
     {  
         List<MySearchUnit> list = new List<MySearchUnit>();  
         list.Add(new MySearchUnit("1a", "标题小王", "今天是小王的生日，大家都很高兴去他家喝酒，玩了一整天。", new Random().Next(1, 10).ToString(), "", ""));  
         list.Add(new MySearchUnit("1b", "标题小张", "今天是小张的生日，大家都很高兴去他家喝酒，玩了几天。", new Random().Next(1, 10).ToString(), "", ""));  
         list.Add(new MySearchUnit("1c", "标题小王", "今天是小王的生日，大家都很高兴去他家喝酒，玩了一整天。", new Random().Next(1, 10).ToString(), "", ""));  
         list.Add(new MySearchUnit("1d", "标题小张", "今天是小张的生日，大家都很高兴去他家喝酒，玩了几天。", new Random().Next(1, 10).ToString(), "", ""));  
         PanGuLuceneHelper.instance.CreateIndex(list);//添加索引  
     }  
     int count = 0;  
     int PageIndex=2;  
     int PageSize=4;  
     string html_content = "";  
     List<MySearchUnit> searchlist = PanGuLuceneHelper.instance.Search("3","小王 生日",PageIndex,PageSize,out count);  
     html_content+=("查询结果：" + count + "条数据<br/>");  
     if (searchlist == null || searchlist.Count==0)  
     {  
         html_content += ("未查询到数据。<br/>");  
     }  
     else  
     {  
         foreach (MySearchUnit data in searchlist)  
         {  
             html_content += (string.Format("id：{0},title：{1},content：{2},flag：{3},updatetime：{4}<br/>", data.id, data.title, data.content, data.flag, data.updatetime));  
         }  
     }  
     html_content += (PanGuLuceneHelper.instance.version);  
     div_content.InnerHtml = html_content;  
 }  




//效果：




第一版源码示例下载：http://download.csdn.net/detail/pukuimin1226/6768179
  最新源码示例下载：http://download.csdn.net/detail/pukuimin1226/6776049
百度云盘下载链接：http://pan.baidu.com/s/1o69cCD8
Lucene.Net没有判断数据重复性，同一条数据插入多少遍它就有多少条相同的数据，所以，我们人为地用id区分，在数据量大，全部重新创建索引时间长的情况下（数据量到几万以上就耗资源了，从数据库中查询出来，再写入索引，使得数据库和程序本身都增加负担），增量建立索引是很有必要的。
新增一条数据，就直接添加一条索引；
修改一条数据，先删除同一个id的索引（不管有多少个id相同的，都会一次性删除），再添加一条。

数据库中的id建议大家都用guid去掉“-”，还可以加日期“yyyyMMddHHmmss”这样组合，长度一致看起来美观，也充分保证唯一。