Library used by ByPrice


Keywords
ByPrice
License
MIT
Install
pip install ByHelpers==1.0.1

Documentation

ByHelpers

Repository created to be used by ByPrice's Scrapers as a library

Installation

$ pip install ByHelpers

APPLOGGER

Create a logger instance to send the scrapers logs

Variables from enviroment

APP_NAME = os.getenv('APP_NAME', '__name__')
LOG_LEVEL = os.getenv('LOG_LEVEL', 'DEBUG')
LOG_HOST = os.getenv('LOG_HOST', 'logs5.papertrailapp.com')
LOG_PORT = os.getenv('LOG_PORT', 27971)
ENV = os.getenv('ENV', 'LOCAL')

HOW TO USE

# Import
from ByHelpers import applogger

# Create logger
applogger.create_logger()

# Instance logger
logger = applogger.get_logger()

# Examples
logger.info("Use this log every time you start a step")
logger.debug("Use this log to track important information for debugging")
logger.warning("Use this log to alert bad behaviors in the scaper")
logger.error("Use this log to declare errors that affect the scraper flow")

RABBIT_ENGINE

Allow you to import two different streamers to send messages to rabbitMQ

Variables from enviroment

APP_NAME = os.getenv('APP_NAME', '__name__')
LOG_LEVEL = os.getenv('LOG_LEVEL', 'DEBUG')
STREAMER_ROUTING_KEY = os.getenv('STREAMER_ROUTING_KEY', 'routing')
SMONITOR_KEY = os.getenv('SMONITOR','smonitor')
ENV = os.getenv('ENV', 'LOCAL')

# Required!!
RETAILER_KEY = os.getenv('RETAILER_KEY')
SCRAPER_TYPE = os.getenv('SCRAPER_TYPE')

HOW TO USE

# Import
from ByHelpers.rabbit_engine import stream_monitor, stream_info
from ByRequests.ByRequests import ByRequest

# MASTER ITEM OR PRICE
# required params:
#  - params (dict)
#  - num_stores (integer)

ms_id = stream_monitor('master', params={"dict": "with params"}, num_stores=1)



# MASTER STORE
ms_id = stream_monitor('master')



# WORKER
# required params:
#  - store_id (str)
#  - ms_id (str)
#  - step (str)
### Possible steps
### "start", "category", "pagination", "price", "item", "store"
### "price_details", "item_details", "store_details", "cookies"

# optional params
#  - value (integer)
#  - br_stats (ByRequest().stats)



br = ByRequests()
ws_id = stream_monitor('worker', store_id='123', ms_id=ms_id, step='category', value=0, br.stats)

# ERROR
# required params:
#  - ws_id (str) # IF THE ERROR OCCURS IN THE WORKER
#  - ms_id (str) # IF THE ERROR OCCURS IN THE MASTER
#  - code (integer)
#  - reason (str)
### Possible codes
### 0 -> No response from ByRequest
### 1 -> Empty
### 2 -> Undefined

# Error in master
es_id = stream_monitor('error', ms_id=ms_id, code=1, reason='The list was empty')

# Error in worker
es_id = stream_monitor('error', ws_id=ws_id, code=0, reason='Cannot get soup')

# Error before master
es_id = stream_monitor('error', code=2, reason='Unknown')

FUNCTIONS

Some functions that will help you with the scrapers

create_attribute(name='', clss_name='', value='', attr_key='', clss_key='')

This function helps you with the format of the attributes

# INGREDIENT
create_attribute(
    name='SOME INGREDIENT,
    clss_name='Ingrediente',
    value=None,
    attr_key='ingrediente',
    clss_key='ingredient'
)

# CATEGORY
create_attribute(
name='SOME CATEGORY',
clss_name='Categoría',
value=1, # DEPTH (index + 1)
attr_key='categoria',
clss_key='category')

# UNIT
create_attribute(
name="SOME UNIT", # Ex. (L, MG, G)
clss_name='Unidad',
value=100, # QUANTITY
attr_key='unidad',
clss_key='unit')

# LABORATORY
create_attribute(
name="SOME LABORATORY",
clss_name='Laboratorio',
value=None,
attr_key='laboratorio',
clss_key='laboratory')


# PRESETNATION
create_attribute(
name="SOME PRESENTATION",
clss_name='Presentación',
value="SOME VALUE", # EX. pastillas/piezas
attr_key='presentacion',
clss_key='presentation')


# CONTAINER TYPE
create_attribute(
name = "SOME CONTAINER", (bolsa, caja, atomizador)
clss_name='Contenedor',
value=None,
attr_key='contenedor',
clss_key='container')

key_format(data)

This function convert an string into a key (without accents and special chars)

key_format("This is an Increíble example in Español!!! :D")

clean_list(list_, remove_duplicates=True)

This function remove the empty string from a list and can drop duplicates

clean_list(["This", "", "\n", "is", "an", "example", "example", "!"])