package com.jrails.modules.regex; import com.jrails.commons.utils.StringUtils; import org.apache.oro.text.regex.*; import java.util.Map; import java.util.Set; import java.util.WeakHashMap; import java.util.LinkedHashMap; /** * Created by arden * User: <a href="mailto:arden.emily@gmail.com">arden</a> * Date: 2009-2-20 19:35:52 */ public final class OroRegex { /** * 替换 * * @param str * @param reg * @param str2 * @param num * @return */ public static String replace(String str, String reg, String str2, int num) { String result = str; if (num == 0) { num = Util.SUBSTITUTE_ALL; } try { String content = str; String ps1 = reg; PatternCompiler orocom = new Perl5Compiler(); Pattern pattern1 = orocom.compile(ps1); PatternMatcher matcher = new Perl5Matcher(); result = Util.substitute(matcher, pattern1, new Perl5Substitution(str2), content, num); } catch (Exception e) { e.printStackTrace(); } return result; } /** * 过滤Go标签 * * @param content * @param params * @return */ public static String parseGoContent(String content, String params) { String pattern = "(<\\s*go\\s+(?:[^\\s>]\\s*){0,})href\\s*=\\s*(\"|'|)([^\\2\\s>]*)\\2((?:\\s*[^\\s>]){0,}\\s*>)"; return parseContent(content, pattern, params); } /** * 过滤A标签 * * @param content * @param params * @return */ public static String parseLinkContent(String content, String params) { String pattern = "(<\\s*a\\s+(?:[^\\s>]\\s*){0,})href\\s*=\\s*(\"|'|)([^\\2\\s>]*)\\2((?:\\s*[^\\s>]){0,}\\s*>)"; return parseContent(content, pattern, params); } /** * 过滤form标签 * * @param content * @param params * @return */ public static String parseFormContent(String content, String params) { String pattern = "(<\\s*form\\s+(?:[^\\s>]\\s*){0,})action\\s*=\\s*(\"|'|)([^\\2\\s>]*)\\2((?:\\s*[^\\s>]){0,}\\s*>)"; return parseContent(content, pattern, params); } /** * 同时过滤A和Go标签 * * @param content * @param params * @return */ public static String parseWapContent(String content, String params) { //String pattern = "(<\\s*go\\s+(?:[^\\s>]\\s*){0,}|<\\s*a\\s+(?:[^\\s>]\\s*){0,})href\\s*=\\s*(\"|'|)([^\\2\\s>]*)\\2((?:\\s*[^\\s>]){0,}\\s*>)"; //String pattern = "(<\\s*option\\s+(?:[^\\s>]\\s*){0,}|<\\s*go\\s+(?:[^\\s>]\\s*){0,}|<\\s*a\\s+(?:[^\\s>]\\s*){0,})href\\s*=\\s*(\"|'|)([^\\2\\s>]*)\\2((?:\\s*[^\\s>]){0,}\\s*>)"; String pattern = "(<\\s*option\\s+(?:[^\\s>]\\s*){0,}|<\\s*go\\s+(?:[^\\s>]\\s*){0,}|<\\s*frame\\s+(?:[^\\s>]\\s*){0,}|<\\s*form\\s+(?:[^\\s>]\\s*){0,}|<\\s*a\\s+(?:[^\\s>]\\s*){0,})[onpick|href|action|src]\\s*=\\s*(\"|'|)([^\\2\\s>]*)\\2((?:\\s*[^\\s>]){0,}\\s*>)"; return parseContent(content, pattern, params); } /** * 同时过滤A和Go标签 * * @param content * @param params * @return */ public static String parseHtmlContent(String content, String params) { String pattern = "(<\\s*option\\s+(?:[^\\s>]\\s*){0,}|<\\s*go\\s+(?:[^\\s>]\\s*){0,}|<\\s*frame\\s+(?:[^\\s>]\\s*){0,}|<\\s*form\\s+(?:[^\\s>]\\s*){0,}|<\\s*a\\s+(?:[^\\s>]\\s*){0,})[onpick|href|action|src]\\s*=\\s*(\"|'|)([^\\2\\s>]*)\\2((?:\\s*[^\\s>]){0,}\\s*>)"; return parseContent(content, pattern, params); } /** * 过滤指定内容 * * @param content * @param pattern * @param params * @return */ private static String parseContent(String content, String pattern, String params) { String orignContent = content; String linkTagPatternStr = pattern; String hrefPatternStr = "(src='([^\"]+)')|(src=\"([^\"]+)\")|(action=\"([^\"]+)\")|(action='([^\"]+)')|(href=\"([^\"]+)\")|(href='([^\"]+)')|(onpick=\"([^\"]+)\")|(onpick='([^\"]+)')"; //String hrefPatternStr = "([href|action|src]\\s*=\\s*(\"|'|)([^\\2\\s>]*)\\2((?:\\s*[^\\s>]){0,}\\s*>))"; PatternCompiler complier = new Perl5Compiler(); PatternMatcher matcher = new Perl5Matcher(); try { Pattern linkPattern = complier.compile(linkTagPatternStr, Perl5Compiler.CASE_INSENSITIVE_MASK); Pattern hrefPattern = complier.compile(hrefPatternStr, Perl5Compiler.CASE_INSENSITIVE_MASK); PatternMatcherInput linkInput = new PatternMatcherInput(content); int count = content.length() / 3; int i = 0; while (matcher.contains(linkInput, linkPattern)) { i++; MatchResult match = matcher.getMatch(); String linkContent = match.toString(); //System.out.println("=========1" + linkContent); // 处理href部份 PatternMatcherInput hrefInput = new PatternMatcherInput(linkContent); if (matcher.contains(hrefInput, hrefPattern)) { match = matcher.getMatch(); String hrefContent = match.toString(); //System.out.println("=========2" + hrefContent); //String targetHrefContent = parseLink(hrefContent, params); String targetHrefContent = analyseLinkContent(hrefContent, params); content = org.apache.commons.lang.StringUtils.replace(content, hrefContent, targetHrefContent); } // 主要担心死循环 if (i >= count) break; } } catch (Exception e) { //e.printStackTrace(); return orignContent; } return content; } public static void main(String[] args) throws MalformedPatternException { //String link = "<option onpick=\"/soft/top?mid=22222&cid=1111\" value=\"05\">生活软件</option>"; //String link = "<option onpick=\"/soft/top\" value=\"05\">生活软件</option>"; //String link = "<option onpick=\"#card\" value=\"05\">生活软件</option>"; String link = "<a href=\"/soft/china/company?\">网秦</a>"; String params = "mid=12&cid=22&id=999"; String s = parseWapContent(link, params); System.out.println(s); } /** * 分析Href中的内容 * * @param linkContent * @param params * @return */ private static String analyseLinkContent(String linkContent, String params) { String[] splits = linkContent.split("href="); String prefix = "href"; if (linkContent.startsWith("action=") || linkContent.startsWith("ACTION=")) { splits = linkContent.split("action="); prefix = "action"; } else if (linkContent.startsWith("src=") || linkContent.startsWith("SRC=")) { splits = linkContent.split("src="); prefix = "src"; } else if (linkContent.startsWith("href=") || linkContent.startsWith("HREF=")) { splits = linkContent.split("href="); prefix = "href"; } else if (linkContent.startsWith("onpick=") || linkContent.startsWith("ONPICK=")) { splits = linkContent.split("onpick="); prefix = "onpick"; } if (splits != null && splits.length == 2) { String content = splits[1]; if (content.startsWith("\"#") || content.startsWith("'#")) { return linkContent; } //System.out.println("content:" + content); int index = content.indexOf("?"); // 查询参数 String queryString = content; Map<String, String> orignQuerys = new LinkedHashMap<String, String>(); if (index > 0) { queryString = content.substring(index + 1, content.length() - 1); //System.out.println("queryString:" + queryString); orignQuerys = parseQueryString(queryString); } Map<String, String> targetQuerys = parseQueryString(params); String queryParams = buildQueryParams(orignQuerys, targetQuerys); //System.out.println("queryParams:" + queryParams); String[] querySplits = content.split("\\?"); if (querySplits != null && querySplits.length >= 1) { content = querySplits[0] + "?" + queryParams; } else { content += "?" + queryParams; } content = content.replaceAll("\"", ""); content = content.replaceAll("'", ""); String targetLink = prefix + "=\"" + content + "\""; //System.out.println("targetLink:" + targetLink); return targetLink; } return linkContent; } /** * 分析查询参数 * * @param queryString * @return */ private static Map<String, String> parseQueryString(String queryString) { Map<String, String> querys = new LinkedHashMap<String, String>(); if (!StringUtils.isEmpty(queryString)) { String[] splits = queryString.split("&"); if (splits != null) { for (String s : splits) { String[] queryValue = s.split("="); if (queryValue != null && queryValue.length >= 1) { String key = queryValue[0]; String value = ""; if (queryValue.length == 2) { value = queryValue[1]; } //System.out.println("key:value:" + key + ":" + value); querys.put(key, value); } } } } return querys; } /** * 构造查询参数 * * @param orignQuerys * @param targetQuerys * @return */ private static String buildQueryParams(Map<String, String> orignQuerys, Map<String, String> targetQuerys) { String queryParams = ""; Set<String> keys = null; keys = targetQuerys.keySet(); for (String key : keys) { String orignValue = orignQuerys.get(key); String targetValue = targetQuerys.get(key); //System.out.println("key:" + key); if (!(orignValue != null && !StringUtils.isEmpty(orignValue))) { // 原来就有这个参数 orignQuerys.put(key, targetValue); } else if (orignValue == null) { orignQuerys.put(key, targetValue); } } keys = orignQuerys.keySet(); int index = 0; int size = keys.size(); for (String key : keys) { index++; String value = orignQuerys.get(key); if (index < size) { queryParams += key + "=" + value + "&"; } else { queryParams += key + "=" + value; } } return queryParams; } }