博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
scrapy Pipeline 练习
阅读量:5138 次
发布时间:2019-06-13

本文共 3001 字,大约阅读时间需要 10 分钟。

class WeatherPipeline(object):    def process_item(self, item, spider):        print(item)        return item#插入到redisimport redisimport jsonclass RedisPipeline(object):    def __init__(self,host,port,password):        self.host=host        self.port=port        self.password=password    @classmethod    def from_crawler(cls, crawler):        return cls(            host=crawler.settings.get('RE_HOST'),            port=crawler.settings.get('RE_PORT', '6379'),            password=crawler.settings.get('RE_PASS', 'xxxxx')        )    def open_spider(self, spider):        pool = redis.ConnectionPool(host=self.host,password=self.password,port=self.port,db=3)        self.client=redis.Redis(connection_pool=pool)        # print(self.client)    def process_item(self, item, spider):        self.client.hmset(item['city'],dict(item))        # self.client.lpush('weather',json.dumps(dict(item)))        # self.client.sadd('weathers',json.dumps(dict(item)))        # return item        return item#插入到mongoDBimport pymongoclass MongoPipeline(object):    collection_name = 'tianqi'    def __init__(self, mongo_host, mongo_db):        self.mongo_host = mongo_host        self.mongo_db = mongo_db    @classmethod    def from_crawler(cls, crawler):        return cls(            mongo_host=crawler.settings.get('MO_HOST'),            mongo_db=crawler.settings.get('MO_DB', 'weather')        )    def open_spider(self, spider):        self.client = pymongo.MongoClient(host=self.mongo_host)        self.db = self.client[self.mongo_db]    def close_spider(self, spider):        self.client.close()    def process_item(self, item, spider):        self.db[self.collection_name].insert_one(dict(item))        return item#插入mysql 数据库import pymysqlclass MysqlPipeline(object):    def __init__(self,host,username,password,database,port,charset):        self.host=host        self.username=username        self.password=password        self.database=database        self.port=port        self.charset=charset    @classmethod    def from_crawler(cls, crawler):        return cls(            host=crawler.settings.get('MY_HOST'),            username=crawler.settings.get('MY_USER'),            password=crawler.settings.get('MY_PASS'),            database=crawler.settings.get('MY_DATA'),            port=crawler.settings.get('MY_PORT'),            charset=crawler.settings.get('MY_CHARSET'),        )    def open_spider(self,spider):        self.client=pymysql.connect(host=self.host,user=self.username,password=self.password,database=self.database,port=self.port,charset=self.charset)        self.cursor=self.client.cursor()    def close_spider(self, spider):        self.cursor.close()        self.client.close()    def process_item(self, item, spider):        self.cursor.execute("INSERT INTO weather (`sheng`,`city`,`hqiwen`,`lqiwen`) VALUES (%s,%s,%s,%s)",(item['sheng'],item['city'],item['hqiwen'],item['lqiwen']))        self.client.commit()        return item

 

转载于:https://www.cnblogs.com/leo0362/p/10796447.html

你可能感兴趣的文章
PNPoly算法代码例子,判断一个点是否在多边形里面
查看>>
gprc-java与golang分别实现服务端,客户端,跨语言通信(二.golang实现)
查看>>
获取当前类得位置以及方法名
查看>>
git小结
查看>>
leetcode做题中的一些小总结,很分散,待整理
查看>>
在centos6.8上源码安装MySQL
查看>>
GetDc函数与GetWindowDC函数的区别
查看>>
marshal intptr to delegate
查看>>
目前js比较流行的js框架
查看>>
C# 插入文本框到PPT幻灯片
查看>>
权限问题
查看>>
Python基础二
查看>>
Kindle Paperwhite 2使用体验
查看>>
touch
查看>>
Leetcode::Best Time to Buy and Sell Stock11
查看>>
POJ 1113 Wall 求凸包
查看>>
POJ 2981 Strange Way to Express Integers 模线性方程组
查看>>
母函数学习篇。
查看>>
redis有哪些功能
查看>>
程序员永远的痛之字符编码的奥秘
查看>>