""" jobspy.scrapers.Goozali ~~~~~~~~~~~~~~~~~~~ This module contains routines to scrape Goozali. """ from __future__ import annotations import datetime import json from jobspy.scrapers import Scraper, ScraperInput from jobspy.scrapers.goozali.GoozaliMapper import GoozaliMapper from jobspy.scrapers.goozali.GoozaliScrapperComponent import GoozaliScrapperComponent from jobspy.scrapers.goozali.model import GoozaliRow, GoozaliColumn, GoozaliResponse, GoozaliPartRequest, GoozaliFullRequest from jobspy.scrapers.goozali.model.GoozaliColumnChoice import GoozaliColumnChoice from jobspy.scrapers.site import Site from ..utils import create_session, create_logger from .constants import headers from ...jobs import ( JobPost, JobResponse, ) logger = create_logger("Goozali") class GoozaliScraper(Scraper): delay = 3 band_delay = 4 jobs_per_page = 25 def __init__( self, proxies: list[str] | str | None = None, ca_cert: str | None = None ): """ Initializes GoozaliScraper with the Goozalijob search url """ super().__init__(site=Site.GOOZALI, proxies=proxies, ca_cert=ca_cert) self.session = create_session( proxies=self.proxies, ca_cert=ca_cert, is_tls=False, has_retry=True, delay=5, clear_cookies=False, ) self.mapper = GoozaliMapper() self.base_url = "https://airtable.com/v0.3/view/{view_id}/readSharedViewData" self.view_ids = ["viwIOzPYaUGxlA0Jd"] self.component = GoozaliScrapperComponent() def scrape(self, scraper_input: ScraperInput) -> JobResponse: """ Scrapes Goozali for jobs with scraper_input criteria :param scraper_input: :return: job_response """ self.scraper_input = scraper_input job_list: list[JobPost] = [] seen_ids = set() for view_id in self.view_ids: full_request = GoozaliFullRequest(self.base_url) part_request = GoozaliPartRequest(self.base_url) try: response = self.session.get( url=full_request.url, params=full_request.params, timeout=10, headers=full_request.headers, cookies=full_request.cookies) logger.info(f"response: {str(response)}") if (response.status_code != 200): logger.error(f"Status code: {response.status_code}, Error: { str(response.text)}") return JobResponse(jobs=job_list) except Exception as e: logger.error(f"Exception: {str(e)}") # model the response with models goozali_response = self.mapper.map_response_to_goozali_response( response=response) # suggestL create groupby field and then filter by hours # filter result by Field like the web field_cloumn = self.component.find_column( goozali_response.data.columns, "Field") software_engineering_choice = self.component.find_choice_from_column( field_cloumn, "Software Engineering") # filter by date filtered_rows_by_age = self.component.filter_rows_by_hours( goozali_response.data.rows, scraper_input.hours_old) # map to JobResponse Object return JobResponse(jobs=job_list)