国庆闲暇时间,写了一个捕获csdn文章的工具。采用了一些简单的算法,希望csdn 不要见怪。 本来想实现图片自动上传,但是没有空,连文章的doc说明也没有仔细写。:) 开发工具:Eclipse3.0 工作平台:WindowXp
  /************************************************  * <p>csdn文章采集工具</p>  * <p>csdn文章采集工具</p>  * <p>CreateData: 2004-10-3  19:59:54</p>  * <p>Description:</p>  * <p>Copyright: Copyright (c) 2004</p>  * <p>Company: 秋水工作室</p>  * @author 王凯  * @version 1.0  ***********************************************/ import java.net.*; import java.sql.*; import java.io.*; 
public class OpenUrl { /**  *得到一个网页地址的方法  **/  public String getContent(String strUrl)  // 一个public方法,返回字符串,错误则返回"error open url"  {   try{        URL url=new URL(strUrl);    BufferedReader br=new BufferedReader(new InputStreamReader(url.openStream()));    String s="";    StringBuffer sb=new StringBuffer("");    while((s=br.readLine())!=null)    {          sb.append(s+"\r\n");        }    br.close();    return sb.toString();   }   catch(Exception e){    return "error open url" + strUrl;       }    }  
  /**  *得到文章并生成页面 */  public static String GetNews(String Path,String addname,String names){   String body = "";  OpenUrl ou=new OpenUrl();  String htmlbody = ou.getContent(Path);  String title=GetSkip(htmlbody,"<span id=\"ArticleTitle1_ArticleTitle1_lblTitle\">",  "</span>");  String aboutkey = GetSkip(htmlbody,"<span id=\"ArticleTitle1_ArticleTitle1_lblKeywords\">","</span>");  String content = GetSkip(htmlbody,"<span id=\"ArticleContent1_ArticleContent1_lblContent\">","</span>");    System.out.println("title="+title); // System.out.println("aboutkey="+aboutkey); // System.out.println("content="+content);  content.replaceAll("'","''"); // content=skipp(content); // OpenUrl.addnew(title,aboutkey,Path,content);  body = "<html><body><title>"+title+"</title></body></html>"    + "<body><csdntitle>标题:"+title+"</csdntitle><br>"    + "<csdnaboutkey>"+aboutkey+"</csdnaboutkey>"    + "<csdnbody>"+content+"</csdnbody>" +"<body></html>";  OpenUrl.scwj("c:\\csnd\\"+addname,title+".htm",body);  return body;  }     /**  *过滤<p>原代码,已经取消 **/  public static String skipp(String body){   System.out.println("skipi="+body.indexOf("<P "));   while(body.indexOf("<P ")>=0){    int i = body.indexOf("<P ");    String qian = body.substring(0,i);    String hou = body.substring(i);    int k = hou.indexOf(">");    if(k>=0){     hou = hou.substring(k+1);    }    body = qian+hou;   }  while(body.indexOf("<SPAN ")>=0){   int i = body.indexOf("<SPAN ");   String qian = body.substring(0,i);   String hou = body.substring(i);   int k = hou.indexOf(">");   if(k>=0){    hou = hou.substring(k+1);   }   body = qian+hou;  }  body.replaceAll("</SPAN>","");  body.replaceAll("</P>","");   return body;  }
  /**  * 得到从spath到epath的内容 **/  public static String GetSkip(String body,String spath,String ePath){   int i = body.indexOf(spath);  String skbody="";   if(i>=0){   skbody = body.substring(i+spath.length(),body.length());    int k = skbody.indexOf(ePath);    if(k>=0){    skbody = skbody.substring(0,k);    }else{    skbody="";    }   }else{   skbody="";   }   return skbody;  }
 
   //具体使用方法   public static void test2(){  OpenUrl ou=new OpenUrl();  String htmlbody = ou.getContent("http://dev.csdn.net/articlelist.aspx?c=6");  while(htmlbody.indexOf("article/")>=0){    int longs = htmlbody.length();    htmlbody = htmlbody.substring(htmlbody.indexOf("article/")+8,longs);    String names = htmlbody.substring(0,htmlbody.indexOf("\" target="));    String path = "http://dev.csdn.net/article/"+names;    System.out.println(path);    int i = names.indexOf("/");    String addname = "";    if (i>=0){   addname = names.substring(0,1);   names =names.substring(i+1);    }    String url = OpenUrl.GetNews(path,addname,names); 
 }     }   /**  *assess参考方法。 **/  public static boolean addnew(String title,String aboutkey,String pathurl,String body){   boolean addok = false;  String odbcQuery;   Connection odbcconn;   Statement odbcstmt;   ResultSet odbcrs;  
  try{    Class.forName("sun.jdbc.odbc.JdbcOdbcDriver");   }catch (ClassNotFoundException e)   { System.out.print ("驱动程序不存在");   }    try{   odbcconn = DriverManager.getConnection("jdbc:odbc:csdn");   odbcstmt = odbcconn.createStatement();        odbcQuery="insert into develop (title,aboutkey,pathurl,body)values('" +    title+"','" +    aboutkey+"','" +    pathurl+"','" +    body+"')"; // System.out.println(odbcQuery);  addok=odbcstmt.execute(odbcQuery);  odbcstmt.close();   odbcconn.close();   }catch (SQLException e)   { System.out.print (e);   }  
  return addok;  }   /*** 生成页面文章 ***/  public static boolean scwj(String path,String FileName,String body){   try {    File f = new File(path);    f.mkdirs();    path=path+"\\"+FileName;    f = new File(path);       PrintWriter out;    out = new PrintWriter(new FileWriter(f));    out.print(body + "\n");    out.close();   } catch (IOException e) {    e.printStackTrace();   } catch (Exception e){    e.printStackTrace();   }   return false;  }    public static void main(String args[])  {   OpenUrl.test2(); //  OpenUrl.GetNews("http://dev.csdn.net/article/40/40149.shtm");  }   } 完毕
    
 
  |