python爬虫简单实现,并在java中调用python脚本,将数据保存在json文件中,,# coding:u


# coding:utf-8import urllib2from bs4 import BeautifulSoupimport jsonimport sysreload(sys)sys.setdefaultencoding(‘utf-8‘)class dataBean(object) :    def __init__(self, title, url,date):        self.date = date        self.url = url        self.title = title    def obj_2_json(obj):        return {            "title":obj.title,            "url":obj.url,            "date":obj.date        }url = "http://localhost:8088/news.html"response3 = urllib2.urlopen(url)soup = BeautifulSoup(response3.read(), ‘html.parser‘, from_encoding=‘utf-8‘)links = soup.find_all(‘a‘,class_=‘‘)data=[]contents = soup.find(‘ul‘, class_="w_newslistpage_list").findAll("li")for content in contents:    bean = dataBean(content.find("span").find("a").get_text(), content.find("span").find("a")[‘href‘],                    content.find(‘span‘, class_="date").get_text())    data.append(dataBean(content.find("span").find("a").get_text(), content.find("span").find("a")[‘href‘],                    content.find(‘span‘, class_="date").get_text()))jsondata= json.dumps(data,default=dataBean.obj_2_json, ensure_ascii=False,encoding=‘utf-8‘)fileObject = open(‘data.json‘, ‘w‘)fileObject.write(jsondata)fileObject.close()print jsondata

java中调用,借助jython.jar,并将bs4文件拷贝在当前文件夹下即可

import org.python.core.Py;import org.python.core.PyString;import org.python.util.PythonInterpreter;public class Main {//jython安装    public static void main(String[] args) {        String code = "# -*- coding: utf-8 -*-\n" +                "import sys\n" +                "reload(sys)\n" + "import urllib2\n" +                "sys.setdefaultencoding(‘utf-8‘)\n" +                "import json\n";        new Thread(new Runnable() {            @Override            public void run() {                PythonInterpreter interpreter = new PythonInterpreter();                interpreter.exec("from bs4 import BeautifulSoup");                PyString code2 = Py.newStringUTF8(code);                interpreter.exec(code2);                interpreter.execfile("D:\\java\\test\\src\\GetNewsDataToLocal.py");            }        }        ).start();    }}

可在当前文件夹看到json文件

python爬虫简单实现,并在java中调用python脚本,将数据保存在json文件中

评论关闭