package com.mite8.service; import com.mite8.utils.CleanStr; import com.mite8.utils.ansj_util.LoadDictionary; import com.mite8.utils.ansj_util.ResultFilter; import com.mite8.utils.ansj_util.SegBrandSpeOpt; import com.mite8.utils.ansj_util.WordNatureFilter; import net.sf.json.JSONArray; import net.sf.json.JSONObject; import org.ansj.domain.Result; import org.ansj.recognition.impl.FilterRecognition; import org.ansj.splitWord.analysis.SeeDicAnalysis; import org.nlpcn.commons.lang.tire.domain.Forest; import org.springframework.stereotype.Service; /** * Author: blogchong * Created: 2016/7/14 * Email: blogchong#qq.com * 公众号:数据虫巢 ID:blogchong * Des: 分词服务service */ @Service public class SegService { public String segWord(String type, String stopFlag, String body, String ruleFlag, String ambigFlag, String synonFlag, String segDic, String ambigDic, String stopDic, String synonDic, String cleanFlag, String natureRule) { String resultStr = ""; Forest seeForest = LoadDictionary.dicSegMap.get(segDic); //判断是否需要进行预处理清理body if(cleanFlag.equals("true")) { body = CleanStr.cleanStr(body); } Result result = null; if (ambigFlag.equals("true")) { Forest ambigForest = LoadDictionary.dicAmbigMap.get(ambigDic); result = SeeDicAnalysis.parse(seeForest, ambigForest, body); } else { result = SeeDicAnalysis.parse(seeForest, body); } //对于brand进行专门处理 if(segDic.equals("brand")) { result = SegBrandSpeOpt.segBrandSpeOpt(result, "brand"); } //是否使用默认的停用过滤 if (stopFlag.equals("true")) { FilterRecognition filter = new FilterRecognition(); filter.insertStopWords(LoadDictionary.dicStopMap.get(stopDic)); result = result.recognition(filter); } //是否使用nature过滤规则 if(natureRule.equals("default")) { //使用默认的nature过滤器 FilterRecognition filter = new FilterRecognition(); filter = WordNatureFilter.wordNatureFilter(filter); result = result.recognition(filter); } else if (natureRule.contains("specify_")) { //过滤指定的nature String[] pars = natureRule.split("_"); if (pars.length == 2) { String[] natureFilters = pars[1].split(","); ResultFilter resultFilter = new ResultFilter(); for (String nature: natureFilters) { resultFilter.addNatureFilterByNature(nature); } result = resultFilter.resultFilterBySpecifyNature(result); } } //是否执行规则过滤 if (ruleFlag.equals("true")) { result = ResultFilter.resultFilterByRule(result); } //是否进行同义词合并 if (synonFlag.equals("true")) { result = ResultFilter.resultFilterBySynon(result, LoadDictionary.dicSynonMap.get(synonDic), synonDic); } //返回形式: 简洁模式 还是详情模式 if (type.equals("simple")) { for (int i = 0; i < result.size(); i++) { String word = result.get(i).getName(); if (resultStr.equals("")) { resultStr = word; } else { resultStr = resultStr + " " + word; } } } else if (type.equals("details")) { JSONObject jsonObject = new JSONObject(); JSONArray jsonArray = new JSONArray(); for (int i = 0; i < result.size(); i++) { String word = result.get(i).getName(); String nature = result.get(i).getNatureStr(); JSONObject jsonObjectTmp = new JSONObject(); jsonObjectTmp.put("word", word); jsonObjectTmp.put("nature", nature); jsonArray.add(jsonObjectTmp); } jsonObject.put("size", result.size()); jsonObject.put("word_list", jsonArray); resultStr = jsonObject.toString(); } return resultStr; } }