mirror of https://github.com/Bunsly/JobSpy
updated the main flow to current
parent
f79a65929b
commit
92cf52d2b6
|
@ -24,7 +24,7 @@ async def main():
|
|||
telegramBot = TelegramBot()
|
||||
jobRepository = JobRepository()
|
||||
# sites_to_scrap = [Site.LINKEDIN, Site.GLASSDOOR, Site.INDEED, Site.GOOZALI]
|
||||
sites_to_scrap = [Site.GLASSDOOR]
|
||||
sites_to_scrap = [Site.GOOZALI]
|
||||
for site in sites_to_scrap:
|
||||
jobs = scrape_jobs(
|
||||
site_name=[site],
|
||||
|
|
|
@ -6,18 +6,16 @@ This module contains routines to scrape Goozali.
|
|||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
import datetime
|
||||
import json
|
||||
|
||||
|
||||
from jobspy.scrapers import Scraper, ScraperInput
|
||||
from jobspy.scrapers.goozali.GoozaliMapper import GoozaliMapper
|
||||
from jobspy.scrapers.goozali.GoozaliScrapperComponent import GoozaliScrapperComponent
|
||||
from jobspy.scrapers.goozali.model import GoozaliRow, GoozaliColumn, GoozaliResponse, GoozaliPartRequest, GoozaliFullRequest
|
||||
from jobspy.scrapers.goozali.model.GoozaliColumnChoice import GoozaliColumnChoice
|
||||
from jobspy.scrapers.goozali.constants import CHOICE_FIELD_KEY, extract_goozali_column_name, job_post_column_to_goozali_column
|
||||
from jobspy.scrapers.goozali.model import GoozaliColumn, GoozaliPartRequest, GoozaliFullRequest
|
||||
from jobspy.scrapers.site import Site
|
||||
|
||||
from ..utils import create_session, create_logger
|
||||
from ..utils import create_dict_by_key_and_value, create_session, create_logger
|
||||
from ...jobs import (
|
||||
JobPost,
|
||||
JobResponse,
|
||||
|
@ -82,13 +80,20 @@ class GoozaliScraper(Scraper):
|
|||
# suggestL create groupby field and then filter by hours
|
||||
# filter result by Field
|
||||
column = self.component.find_column(
|
||||
goozali_response.data.columns, "Field")
|
||||
goozali_response.data.columns, job_post_column_to_goozali_column["field"])
|
||||
column_choice = self.component.find_choice_from_column(
|
||||
column, "Software Engineering")
|
||||
column, CHOICE_FIELD_KEY)
|
||||
filtered_rows_by_column_choice = self.component.filter_rows_by_column_choice(
|
||||
goozali_response.data.rows, column, column_choice)
|
||||
filtered_rows_by_age_and_column_choice = self.component.filter_rows_by_hours(
|
||||
filtered_rows_by_column_choice, scraper_input.hours_old)
|
||||
dict_column_name_to_column: dict[str, GoozaliColumn] = create_dict_by_key_and_value(
|
||||
goozali_response.data.columns, extract_goozali_column_name)
|
||||
response: list[JobPost] = []
|
||||
# map to JobResponse Object
|
||||
for row in filtered_rows_by_age_and_column_choice:
|
||||
job_post = self.mapper.map_goozali_response_to_job_post(
|
||||
row, dict_column_name_to_column)
|
||||
response.append(job_post)
|
||||
|
||||
return JobResponse(jobs=job_list)
|
||||
return JobResponse(jobs=job_list)
|
||||
|
|
|
@ -13,6 +13,8 @@ job_post_column_to_goozali_column = {
|
|||
"id": "Job ID"
|
||||
}
|
||||
|
||||
CHOICE_FIELD_KEY = "Software Engineering"
|
||||
|
||||
job_post_column_names = ["id",
|
||||
"date_posted",
|
||||
"field",
|
||||
|
|
Loading…
Reference in New Issue