在这里,经过我跟同事的一起商量,决定服务端为客户端写一种中间格式, 包括所有需要的格式:TEXT, IMG, A, \n
所以我在服务端就定义了一个接口,一些子类.
接口: IMediaObject.java
/**
*
* @author Vernon.Chen
* @version 1.0 2013-8-23
*/
public interface IMediaObject {
public static final int TYPE_A = 1;
public static final int TYPE_IMAGE = 2;
public static final int TYPE_BR = 3;
public static final int TYPE_TEXT = 4;
int getType();
}
类: TextObject.java
/**
*
* @author Vernon.Chen
* @version 1.0 2013-8-23
*/
public class TextObject implements IMediaObject {
private String text;
public TextObject() {
}
public TextObject(String text) {
this.text = text;
}
public String getText() {
return text;
}
public void setText(String text) {
this.text = text;
}
@Override
public int getType() {
return TYPE_TEXT;
}
}
类:ImageObject.java
/**
*
* @author Vernon.Chen
* @version 1.0 2013-8-23
*/
public class ImageObject implements IMediaObject {
private String src;
private String imageId;
public ImageObject() {
}
public ImageObject(String src) {
this.src = src;
}
public ImageObject(String src, String imageId) {
this.src = src;
this.imageId = imageId;
}
public String getSrc() {
return src;
}
public void setSrc(String src) {
this.src = src;
}
@Override
public int getType() {
return TYPE_IMAGE;
}
public String getImageId() {
return imageId;
}
public void setImageId(String imageId) {
this.imageId = imageId;
}
}
类:AObject.java
/**
*
* @author Vernon.Chen
* @version 1.0 2013-8-23
*/
public class AObject implements IMediaObject {
private String href;
private String name;
public AObject() {
};
public AObject(String href, String name) {
this.href = href;
this.name = name;
}
public String getHref() {
return href;
}
public void setHref(String href) {
this.href = href;
}
@Override
public int getType() {
return TYPE_A;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
}
类:BrObject.java
/**
*
* @author Vernon.Chen
* @version 1.0 2013-8-23
*/
public class BrObject implements IMediaObject{
private String name = "\n";
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
@Override
public int getType() {
return TYPE_BR;
}
}
这就是以上的集中格式.
然后,我们用Jsoup的包去解析文本,并且转换成我们的中间格式,最终用JSON数组的格式返回给客户端去. 客户端只要按照JSON的顺序解析出来就可以了.
核心解析代码,利用了递归算法.
/**
* 解析返回的内容
*
* @author Vernon.Chen
*
*/
static class ParseContext {
StringBuilder sb = new StringBuilder();
List<IMediaObject> list = new ArrayList<IMediaObject>();
public void closeLast() {
if (sb.length() > 0) {
list.add(new TextObject(sb.toString()));
sb.setLength(0);
}
}
}
/**
* 解析
*
* @param source
* @return
* @author Vernon.Chen
* @date 2013-8-27
*/
@SuppressWarnings("unchecked")
public static List<IMediaObject> parse(String source) {
if (StringUtil.isEmpty(source)) {
return Collections.EMPTY_LIST ;
}
Document doc = Jsoup.parse(source);
Element element = doc.body();
ParseContext pc = new ParseContext();
parseElementChildren(pc, element);
pc.closeLast();
return pc.list;
}
/**
* 递归
*
* @param parseContext
* @param element
* @author Vernon.Chen
* @date 2013-8-27
*/
private static void parseElementChildren(ParseContext parseContext, Element element) {
List<Node> nodes = element.childNodes();
for (Node node : nodes) {
String nodeName = node.nodeName();
if (nodeName.equals("#text")) { // 连续的Text应该放在一起
TextNode textNode = (TextNode) node;
parseContext.sb.append(textNode.text());
} else if (nodeName.equals("br")) {
parseContext.closeLast();
parseContext.list.add(new BrObject());
} else if (nodeName.equals("a")) {
parseA(parseContext, node);
} else if (nodeName.equals("img")) {
parseContext.closeLast();
Element e = (Element) node;
// 解析IMG元素, 提取有用数据
String src = e.attr("src") ;
String imageId = "" ;
if (StringUtil.isNotEmpty(src)) {
// http://img./img/xl_7vA7fe.png
if (src.indexOf("dianziq") > -1){
if(src.lastIndexOf('_') > -1) {
imageId = src.substring(src.lastIndexOf('_') + 1,src.length()) ;
} else {
imageId = src.substring(src.lastIndexOf('/') + 1,src.length()) ;
}
}
}
ImageObject image = new ImageObject(src, imageId);
parseContext.list.add(image);
} else if (nodeName.equals("p")) {
if (node instanceof Element) {
parseElementChildren(parseContext, (Element) node);
}
parseContext.closeLast();
parseContext.list.add(new BrObject());
}else {
if (node instanceof Element) {
parseElementChildren(parseContext, (Element) node);
}
}
}
}
/**
* 解析A
*
* @param parseContext
* @param node
* @author Vernon.Chen
* @date 2013-8-27
*/
private static void parseA(ParseContext parseContext, Node node) {
parseContext.closeLast();
Element e = (Element) node ;
Elements elements = e.select("img") ;
if (elements != null && elements.size() > 0) { // 包含图片
List<Node> nodes = e.childNodes();
if (nodes.size() > 0) {
parseElementChildren(parseContext,e);
}
} else {
AObject a = new AObject(e.attr("href"), e.text());
parseContext.list.add(a);
}
}
最后我们如果调用呢?
public static void main(String[] args) {
StringBuilder sb = new StringBuilder() ;
sb.append("<a title='sadfas' href='http://www.sdfasdfsadfds' target='_blanck'>我是</a>") ;
sb.append("<img data-addr='http://192.168.1.29/wendaimg/77d41de8917e4511c444e1f7facb86b79fac1f2b.jpg' dzq-style='max-width: 500px;' dzq='true' src='http://192.168.1.29/wendaimg/77d41de8917e4511c444e1f7facb86b79fac1f2b.jpg' style='max-width: 500px;'/>") ;
sb.append("<a title=sadfas href=http://www.sdfasdfsadfds target=_blanck>喜欢</a>") ;
sb.append("<a title=sadfas href=http://www.sdfasdfsadfds target=_blanck>喜欢<strong>eee</strong><div>111111<img src='http://img./img/xl_7vA7fe.png'/></div></a>");
sb.append("<a title=sadfas href=http://www.sdfasdfsadfds target=_blanck>喜欢</a>");
List<IMediaObject> list = new DzqJsoup().parse(sb.toString());
// list = Collections.EMPTY_LIST ;
System.out.println(JSONArray.fromObject(list));
}
这样子就可以, 可能这种方式不是最好,但是这也我现在能够想到的. 如果各位有更好的方式, 请联系我. 谢谢~
|