aiocutter

aiocutter is scraping tool for asyncio.

Usage

from aiocutter import aiocutter

url = "https://github.com/python/pythondotorg/issues"
cutter = aiocutter.AioCutter()
result = cutter.run(url, GithubIssue)

write scraping rule in the class which inherits Scrap.

from aiocutter.scrap import Scrap


class GithubIssue(Scrap):

    def __init__(self, title):
        super().__init__()
        self.title = title

    @classmethod
    def get_iterator(cls, url, html):
        issues = html.find_all("a", class_="issue-title-link")
        return issues

    @classmethod
    def get_next_url(cls, url, html):
        next_url = ""
        current = html.select("em.current")[0]
        next_link = current.find_next_siblings("a")
        if len(next_link) > 0 and next_link[0].string and next_link[0].string.isdigit():
            next_url = cls._make_abs_url(url, next_link[0]["href"])

        return next_url

    @classmethod
    def create(cls, url, html):
        title = cls._get_element_text(html)
        news = GithubIssue(title)
        return news

    def to_line(self):
        return "\t".join([self.title])

    def __str__(self):
        return self.to_line()

Install

pip install aiocutter

aiocutter
Release 0.0.2

Release 0.0.2

0.0.4

0.0.3

0.0.2

0.0.1

Documentation

aiocutter

Usage

Install

Stats

Development practices

Releases

Contributors

aiocutter Release 0.0.2

Release 0.0.2 Toggle Dropdown 0.0.4 0.0.3 0.0.2 0.0.1

Documentation

aiocutter

Usage

Install

Stats

Development practices

Releases

Contributors

aiocutter
Release 0.0.2

Release 0.0.2

0.0.4

0.0.3

0.0.2

0.0.1