pAsynCrawler


License
MIT
Install
pip install pAsynCrawler==0.1.11

Documentation

pAsynCrawler

Package version

Installation

pip install pAsynCrawler

Features

  • Fetch data - Asynchronously
  • Parse data - with multiprocessing

Example

examples

from bs4 import BeautifulSoup
from pAsynCrawler import AsynCrawler, flattener


def parser_0(response_text):
    soup = BeautifulSoup(response_text)
    menus = soup.select('ul > li > span > a')
    datas = tuple(x.text for x in menus)
    urls = tuple(x.attrs['href'] for x in menus)
    return (datas, urls)


def parser_0(response_text):
    soup = BeautifulSoup(response_text)
    menus = soup.select('ul > li > a')
    datas = tuple(x.text for x in menus)
    urls = tuple(x.attrs['href'] for x in menus)
    return (datas, urls)


if __name__ == '__main__':
    ac = AsynCrawler(asy_fetch=20, mp_parse=8)
    datas_1, urls_1 = ac.fetch_and_parse(parser_0, ['https://www.example.com'])
    datas_2, urls_2 = ac.fetch_and_parse(parser_1, flattener(urls_1))