JSP采集代码
<%@pagecontentType="text/html;charset=gb2312"language="java"import="java.util.regex.*"errorPage=""%>
<%
StringsCurrentLine;
StringsTotalString;
sCurrentLine="";
sTotalString="";
java.io.InputStreaml_urlStream;
java.net.URLl_url=newjava.net.URL("http://www.baidu.com");
java.net.HttpURLConnectionl_connection=(java.net.HttpURLConnection)l_url.openConnection();
l_connection.connect();
l_urlStream=l_connection.getInputStream();
java.io.BufferedReaderl_reader=newjava.io.BufferedReader(newjava.io.InputStreamReader(l_urlStream));
while((sCurrentLine=l_reader.readLine())!=null)
{
sTotalString+=sCurrentLine;
}
//out.print(sTotalString);
//StringregEx="href=([^\"']*)";
StringregEx="href=\"([^\"]*)\"";//找出href="****"的链接
//StringregEx="href=\"+\"";
Patternp=Pattern.compile(regEx,Pattern.CASE_INSENSITIVE);
Matcherm=p.matcher(sTotalString);
intj=0;
while(m.find()){
j++;
out.println("m.group("+j+"):"+m.group(0)+"<br>\n");
}
regEx="href='([^']*)'";//找出href='****'的链接
p=Pattern.compile(regEx,Pattern.CASE_INSENSITIVE);//Pattern.CASE_INSENSITIVE查找忽略大小写
m=p.matcher(sTotalString);
while(m.find()){
j++;
out.println("m2.group("+j+"):"+m.group(0)+"<br>\n");
}
%>