import problem on getting the field choice from main but align classes names

pull/231/head
Yariv Menachem 2024-12-22 17:53:04 +02:00
parent cbe3a97a73
commit b55287b5ec
14 changed files with 86 additions and 70 deletions

View File

@ -1,6 +1,4 @@
from __future__ import annotations
from datetime import datetime
from enum import Enum
import pandas as pd
from typing import Tuple
@ -90,7 +88,6 @@ def scrape_jobs(
return site_types
country_enum = Country.from_string(country_indeed)
scraper_input = ScraperInput(
site_type=get_site_type(),
country=country_enum,
@ -107,7 +104,7 @@ def scrape_jobs(
results_wanted=results_wanted,
linkedin_company_ids=linkedin_company_ids,
offset=offset,
hours_old=hours_old,
hours_old=hours_old
)
def scrape_site(site: Site) -> Tuple[str, JobResponse]:

View File

@ -35,7 +35,7 @@ async def main():
"Central, Israel", "Rehovot ,Israel"],
results_wanted=200,
hours_old=200,
country_indeed='israel',
country_indeed='israel'
)
logger.info(f"Found {len(jobs)} jobs")
jobs = list(filter(filter_jobs_by_title_name, jobs))

View File

@ -4,7 +4,6 @@ from abc import ABC, abstractmethod
from jobspy.scrapers.site import Site
from ..jobs import (
Enum,
BaseModel,

View File

@ -2,10 +2,7 @@ from datetime import datetime
import json
from jobspy.jobs import JobPost, Location
from jobspy.scrapers.goozali.model import GoozaliColumnTypeOptions, GoozaliResponse, GoozaliRow
from jobspy.scrapers.goozali.model.GoozaliColumn import GoozaliColumn
from jobspy.scrapers.goozali.model.GoozaliColumnChoice import GoozaliColumnChoice
from jobspy.scrapers.goozali.model.GozaaliResponseData import GoozaliResponseData
from jobspy.scrapers.goozali.model import GoozaliColumnTypeOptions, GoozaliResponse, GoozaliRow, GoozaliColumn, GoozaliColumnChoice, GoozaliResponseData
from .constants import job_post_column_to_goozali_column, job_post_column_names
# Mapping function to convert parsed dictionary into GoozaliResponseData

View File

@ -1,8 +1,6 @@
from datetime import datetime, timedelta
from jobspy.scrapers.goozali.model import GoozaliRow
from jobspy.scrapers.goozali.model.GoozaliColumn import GoozaliColumn
from jobspy.scrapers.goozali.model.GoozaliColumnChoice import GoozaliColumnChoice
from jobspy.scrapers.goozali.model import GoozaliRow, GoozaliColumn, GoozaliColumnChoice
from jobspy.scrapers.utils import create_logger
# Mapping function to convert parsed dictionary into GoozaliResponseData

View File

@ -8,11 +8,11 @@ This module contains routines to scrape Goozali.
from __future__ import annotations
from jobspy.scrapers import Scraper, ScraperInput
from .. import Scraper, ScraperInput
from jobspy.scrapers.goozali.GoozaliMapper import GoozaliMapper
from jobspy.scrapers.goozali.GoozaliScrapperComponent import GoozaliScrapperComponent
from jobspy.scrapers.goozali.constants import CHOICE_FIELD_KEY, extract_goozali_column_name, job_post_column_to_goozali_column
from jobspy.scrapers.goozali.model import GoozaliColumn, GoozaliPartRequest, GoozaliFullRequest
from jobspy.scrapers.goozali.constants import extract_goozali_column_name, job_post_column_to_goozali_column
from jobspy.scrapers.goozali.model import GoozaliColumn, GoozaliFieldChoice, GoozaliPartRequest, GoozaliFullRequest
from jobspy.scrapers.site import Site
from ..utils import create_dict_by_key_and_value, create_session, create_logger
@ -45,7 +45,6 @@ class GoozaliScraper(Scraper):
)
self.mapper = GoozaliMapper()
self.base_url = "https://airtable.com/v0.3/view/{view_id}/readSharedViewData"
self.view_ids = ["viwIOzPYaUGxlA0Jd"]
self.component = GoozaliScrapperComponent()
def scrape(self, scraper_input: ScraperInput) -> JobResponse:
@ -56,8 +55,6 @@ class GoozaliScraper(Scraper):
"""
self.scraper_input = scraper_input
job_list: list[JobPost] = []
seen_ids = set()
for view_id in self.view_ids:
full_request = GoozaliFullRequest(self.base_url)
part_request = GoozaliPartRequest(self.base_url)
try:
@ -82,7 +79,7 @@ class GoozaliScraper(Scraper):
column = self.component.find_column(
goozali_response.data.columns, job_post_column_to_goozali_column["field"])
column_choice = self.component.find_choice_from_column(
column, CHOICE_FIELD_KEY)
column, GoozaliFieldChoice.SOFTWARE_ENGINEERING.value)
filtered_rows_by_column_choice = self.component.filter_rows_by_column_choice(
goozali_response.data.rows, column, column_choice)
filtered_rows_by_age_and_column_choice = self.component.filter_rows_by_hours(

View File

@ -13,8 +13,6 @@ job_post_column_to_goozali_column = {
"id": "Job ID"
}
CHOICE_FIELD_KEY = "Software Engineering"
job_post_column_names = ["id",
"date_posted",
"field",

View File

@ -1,4 +1,4 @@
from jobspy.scrapers.goozali.model.GoozaliColumnChoice import GoozaliColumnChoice
from jobspy.scrapers.goozali.model import GoozaliColumnChoice
class GoozaliColumnTypeOptions:

View File

@ -0,0 +1,31 @@
from enum import Enum
class GoozaliFieldChoice(Enum):
PRODUCT_MANAGEMENT = "Product Management"
DATA_ANALYST = "Data Analyst"
DATA_SCIENCE_ML_ALGORITHMS = "Data Science, ML & Algorithms"
SOFTWARE_ENGINEERING = "Software Engineering"
QA = "QA"
CYBERSECURITY = "Cybersecurity"
IT_AND_SYSTEM_ADMINISTRATION = "IT and System Administration"
FRONTEND_DEVELOPMENT = "Frontend Development"
DEVOPS = "DevOps"
UI_UX_DESIGN_CONTENT = "UI/UX, Design & Content"
HR_RECRUITMENT = "HR & Recruitment"
MOBILE_DEVELOPMENT = "Mobile Development"
HARDWARE_ENGINEERING = "Hardware Engineering"
EMBEDDED_LOW_LEVEL_FIRMWARE_ENGINEERING = "Embedded, Low Level & Firmware Engineering"
CUSTOMER_SUCCESS = "Customer Success"
PROJECT_MANAGEMENT = "Project Management"
OPERATIONS = "Operations"
FINANCE = "Finance"
SYSTEMS_ENGINEERING = "Systems Engineering"
MARKETING = "Marketing"
SALES = "Sales"
COMPLIANCE_LEGAL_POLICY = "Compliance, Legal & Policy"
C_LEVEL = "C-Level"
BUSINESS_DEVELOPMENT = "Business Development"
MECHANICAL_ENGINEERING = "Mechanical Engineering"
NATURAL_SCIENCE = "Natural Science"
OTHER = "Other"

View File

@ -1,4 +1,4 @@
from jobspy.scrapers.goozali.model.GozaaliResponseData import GoozaliResponseData
from jobspy.scrapers.goozali.model import GoozaliResponseData
class GoozaliResponse:

View File

@ -1,5 +1,4 @@
from jobspy.scrapers.goozali.model import GoozaliRow
from jobspy.scrapers.goozali.model.GoozaliColumn import GoozaliColumn
from jobspy.scrapers.goozali.model import GoozaliRow, GoozaliColumn
class GoozaliResponseData:

View File

@ -2,5 +2,8 @@ from .GoozaliRow import GoozaliRow
from .GoozaliResponse import GoozaliResponse
from .GoozaliColumn import GoozaliColumn
from .GoozaliPartRequest import GoozaliPartRequest
from .FullRequest import GoozaliFullRequest
from .GoozaliFullRequest import GoozaliFullRequest
from .GoozaliColumnTypeOptions import GoozaliColumnTypeOptions
from .GoozaliFieldChoice import GoozaliFieldChoice
from .GoozaliResponseData import GoozaliResponseData
from .GoozaliColumnChoice import GoozaliColumnChoice

View File

@ -1,14 +1,11 @@
import json
import os
from jobspy import scrape_jobs
import pandas as pd
from jobspy.jobs import JobPost
from jobspy.scrapers.goozali.GoozaliMapper import GoozaliMapper
from jobspy.scrapers.goozali.GoozaliScrapperComponent import GoozaliScrapperComponent
from jobspy.scrapers.goozali.constants import CHOICE_FIELD_KEY, extract_goozali_column_name, job_post_column_to_goozali_column
from jobspy.scrapers.goozali.model import GoozaliColumn
from jobspy.scrapers.goozali.model.GozaaliResponseData import GoozaliResponseData
from jobspy.scrapers.goozali.constants import extract_goozali_column_name, job_post_column_to_goozali_column
from jobspy.scrapers.goozali.model import GoozaliColumn, GoozaliFieldChoice, GoozaliResponseData
from jobspy.scrapers.utils import create_dict_by_key_and_value
# URL Example
# https://airtable.com/v0.3/view/viwagEIbkfz2iMsLU/readSharedViewData?stringifiedObjectParams=%7B%22shouldUseNestedResponseFormat%22%3Atrue%7D&requestId=reqXyRSHWlXyiRgY9&accessPolicy=%7B%22allowedActions%22%3A%5B%7B%22modelClassName%22%3A%22view%22%2C%22modelIdSelector%22%3A%22viwagEIbkfz2iMsLU%22%2C%22action%22%3A%22readSharedViewData%22%7D%2C%7B%22modelClassName%22%3A%22view%22%2C%22modelIdSelector%22%3A%22viwagEIbkfz2iMsLU%22%2C%22action%22%3A%22getMetadataForPrinting%22%7D%2C%7B%22modelClassName%22%3A%22view%22%2C%22modelIdSelector%22%3A%22viwagEIbkfz2iMsLU%22%2C%22action%22%3A%22readSignedAttachmentUrls%22%7D%2C%7B%22modelClassName%22%3A%22row%22%2C%22modelIdSelector%22%3A%22rows%20*%5BdisplayedInView%3DviwagEIbkfz2iMsLU%5D%22%2C%22action%22%3A%22createDocumentPreviewSession%22%7D%5D%2C%22shareId%22%3A%22shr97tl6luEk4Ca9R%22%2C%22applicationId%22%3A%22app5sYJyDgcRbJWYU%22%2C%22generationNumber%22%3A0%2C%22expires%22%3A%222025-01-02T00%3A00%3A00.000Z%22%2C%22signature%22%3A%223aa292ee44d15aa75d9506200329e413653471f89e000fa370ef9fa38393070a%22%7D
@ -31,7 +28,7 @@ try:
column = component.find_column(
response_data.columns, job_post_column_to_goozali_column["field"])
column_choice = component.find_choice_from_column(
column, CHOICE_FIELD_KEY)
column, GoozaliFieldChoice.SOFTWARE_ENGINEERING)
filtered_rows_by_column_choice = component.filter_rows_by_column_choice(
response_data.rows, column, column_choice)