package com.mite8.jx.gz.dn.utils;
import com.mite8.jx.gz.dn.entity.EmotionEntity;
import org.nlpcn.commons.lang.tire.domain.Forest;
import org.nlpcn.commons.lang.tire.domain.Value;
import org.nlpcn.commons.lang.util.StringUtil;
import org.springframework.stereotype.Service;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.HashMap;
import java.util.Map;
import java.util.logging.Logger;
/**
* Author: blogchong
* Time: 2016/10/14.
* Email: blogchong#qq.com
* 公众号:数据虫巢 ID:blogchong
* Desc: 情感字典的自动加载。
*/
@Service
public class LoadEmotionDictionary {
private static final Logger logger = Logger.getLogger(LoadEmotionDictionary.class.getName());
public static Map<String, EmotionEntity> dicEmotion = new HashMap<>();
public LoadEmotionDictionary() throws Exception{
loadEmotionDic("/dic/emotion/emotion.dic", "emotion");
}
//加载分词字典
private static Forest loadEmotionDic(String path, String nature) throws Exception{
Forest forest = new Forest();
InputStream input = LoadEmotionDictionary.class.getResourceAsStream(path);
BufferedReader br = new BufferedReader(new InputStreamReader(input));
String temp = null;
String[] strs;
Value value;
int count = 0;
int errorCount = 0;
int errorCountOut = 0;
try {
while ((temp = br.readLine()) != null) {
//获取到resource中的每行
if (StringUtil.isNotBlank(temp)) {
temp = StringUtil.trim(temp);
strs = temp.split("\t");
if(strs.length >= 7) {
try {
EmotionEntity emotionEntity = new EmotionEntity();
emotionEntity.setWord(strs[0]);
emotionEntity.setType(strs[4]);
emotionEntity.setStrength(Integer.parseInt(strs[5]));
emotionEntity.setPolar(Integer.parseInt(strs[6]));
dicEmotion.put(strs[0], emotionEntity);
count++;
} catch (Exception e){
errorCount++;
}
} else {
// logger.info("ERROR - DIC-EMOTION {"+ temp +"} : + " + strs.toString());
}
}
}
br.close();
} catch (IOException e) {
System.err.println("READ DIC-EMOTION ERROR: " + e);
e.printStackTrace();
}
logger.info("###[DIC-EMOTION]###The dic of Emotion[" + path + "] is loaded, errorOut["+errorCountOut+"],error["+ errorCount +"],the num of dic is: " + count);
return forest;
}
}