import problem on getting the field choice from main but align classes names

pull/231/head
Yariv Menachem 2024-12-22 17:53:04 +02:00
parent cbe3a97a73
commit b55287b5ec
14 changed files with 86 additions and 70 deletions

View File

@ -1,6 +1,4 @@
from __future__ import annotations from __future__ import annotations
from datetime import datetime
from enum import Enum
import pandas as pd import pandas as pd
from typing import Tuple from typing import Tuple
@ -90,7 +88,6 @@ def scrape_jobs(
return site_types return site_types
country_enum = Country.from_string(country_indeed) country_enum = Country.from_string(country_indeed)
scraper_input = ScraperInput( scraper_input = ScraperInput(
site_type=get_site_type(), site_type=get_site_type(),
country=country_enum, country=country_enum,
@ -107,7 +104,7 @@ def scrape_jobs(
results_wanted=results_wanted, results_wanted=results_wanted,
linkedin_company_ids=linkedin_company_ids, linkedin_company_ids=linkedin_company_ids,
offset=offset, offset=offset,
hours_old=hours_old, hours_old=hours_old
) )
def scrape_site(site: Site) -> Tuple[str, JobResponse]: def scrape_site(site: Site) -> Tuple[str, JobResponse]:

View File

@ -35,7 +35,7 @@ async def main():
"Central, Israel", "Rehovot ,Israel"], "Central, Israel", "Rehovot ,Israel"],
results_wanted=200, results_wanted=200,
hours_old=200, hours_old=200,
country_indeed='israel', country_indeed='israel'
) )
logger.info(f"Found {len(jobs)} jobs") logger.info(f"Found {len(jobs)} jobs")
jobs = list(filter(filter_jobs_by_title_name, jobs)) jobs = list(filter(filter_jobs_by_title_name, jobs))

View File

@ -4,7 +4,6 @@ from abc import ABC, abstractmethod
from jobspy.scrapers.site import Site from jobspy.scrapers.site import Site
from ..jobs import ( from ..jobs import (
Enum, Enum,
BaseModel, BaseModel,

View File

@ -2,10 +2,7 @@ from datetime import datetime
import json import json
from jobspy.jobs import JobPost, Location from jobspy.jobs import JobPost, Location
from jobspy.scrapers.goozali.model import GoozaliColumnTypeOptions, GoozaliResponse, GoozaliRow from jobspy.scrapers.goozali.model import GoozaliColumnTypeOptions, GoozaliResponse, GoozaliRow, GoozaliColumn, GoozaliColumnChoice, GoozaliResponseData
from jobspy.scrapers.goozali.model.GoozaliColumn import GoozaliColumn
from jobspy.scrapers.goozali.model.GoozaliColumnChoice import GoozaliColumnChoice
from jobspy.scrapers.goozali.model.GozaaliResponseData import GoozaliResponseData
from .constants import job_post_column_to_goozali_column, job_post_column_names from .constants import job_post_column_to_goozali_column, job_post_column_names
# Mapping function to convert parsed dictionary into GoozaliResponseData # Mapping function to convert parsed dictionary into GoozaliResponseData

View File

@ -1,8 +1,6 @@
from datetime import datetime, timedelta from datetime import datetime, timedelta
from jobspy.scrapers.goozali.model import GoozaliRow from jobspy.scrapers.goozali.model import GoozaliRow, GoozaliColumn, GoozaliColumnChoice
from jobspy.scrapers.goozali.model.GoozaliColumn import GoozaliColumn
from jobspy.scrapers.goozali.model.GoozaliColumnChoice import GoozaliColumnChoice
from jobspy.scrapers.utils import create_logger from jobspy.scrapers.utils import create_logger
# Mapping function to convert parsed dictionary into GoozaliResponseData # Mapping function to convert parsed dictionary into GoozaliResponseData

View File

@ -8,11 +8,11 @@ This module contains routines to scrape Goozali.
from __future__ import annotations from __future__ import annotations
from jobspy.scrapers import Scraper, ScraperInput from .. import Scraper, ScraperInput
from jobspy.scrapers.goozali.GoozaliMapper import GoozaliMapper from jobspy.scrapers.goozali.GoozaliMapper import GoozaliMapper
from jobspy.scrapers.goozali.GoozaliScrapperComponent import GoozaliScrapperComponent from jobspy.scrapers.goozali.GoozaliScrapperComponent import GoozaliScrapperComponent
from jobspy.scrapers.goozali.constants import CHOICE_FIELD_KEY, extract_goozali_column_name, job_post_column_to_goozali_column from jobspy.scrapers.goozali.constants import extract_goozali_column_name, job_post_column_to_goozali_column
from jobspy.scrapers.goozali.model import GoozaliColumn, GoozaliPartRequest, GoozaliFullRequest from jobspy.scrapers.goozali.model import GoozaliColumn, GoozaliFieldChoice, GoozaliPartRequest, GoozaliFullRequest
from jobspy.scrapers.site import Site from jobspy.scrapers.site import Site
from ..utils import create_dict_by_key_and_value, create_session, create_logger from ..utils import create_dict_by_key_and_value, create_session, create_logger
@ -45,7 +45,6 @@ class GoozaliScraper(Scraper):
) )
self.mapper = GoozaliMapper() self.mapper = GoozaliMapper()
self.base_url = "https://airtable.com/v0.3/view/{view_id}/readSharedViewData" self.base_url = "https://airtable.com/v0.3/view/{view_id}/readSharedViewData"
self.view_ids = ["viwIOzPYaUGxlA0Jd"]
self.component = GoozaliScrapperComponent() self.component = GoozaliScrapperComponent()
def scrape(self, scraper_input: ScraperInput) -> JobResponse: def scrape(self, scraper_input: ScraperInput) -> JobResponse:
@ -56,8 +55,6 @@ class GoozaliScraper(Scraper):
""" """
self.scraper_input = scraper_input self.scraper_input = scraper_input
job_list: list[JobPost] = [] job_list: list[JobPost] = []
seen_ids = set()
for view_id in self.view_ids:
full_request = GoozaliFullRequest(self.base_url) full_request = GoozaliFullRequest(self.base_url)
part_request = GoozaliPartRequest(self.base_url) part_request = GoozaliPartRequest(self.base_url)
try: try:
@ -82,7 +79,7 @@ class GoozaliScraper(Scraper):
column = self.component.find_column( column = self.component.find_column(
goozali_response.data.columns, job_post_column_to_goozali_column["field"]) goozali_response.data.columns, job_post_column_to_goozali_column["field"])
column_choice = self.component.find_choice_from_column( column_choice = self.component.find_choice_from_column(
column, CHOICE_FIELD_KEY) column, GoozaliFieldChoice.SOFTWARE_ENGINEERING.value)
filtered_rows_by_column_choice = self.component.filter_rows_by_column_choice( filtered_rows_by_column_choice = self.component.filter_rows_by_column_choice(
goozali_response.data.rows, column, column_choice) goozali_response.data.rows, column, column_choice)
filtered_rows_by_age_and_column_choice = self.component.filter_rows_by_hours( filtered_rows_by_age_and_column_choice = self.component.filter_rows_by_hours(

View File

@ -13,8 +13,6 @@ job_post_column_to_goozali_column = {
"id": "Job ID" "id": "Job ID"
} }
CHOICE_FIELD_KEY = "Software Engineering"
job_post_column_names = ["id", job_post_column_names = ["id",
"date_posted", "date_posted",
"field", "field",

View File

@ -1,4 +1,4 @@
from jobspy.scrapers.goozali.model.GoozaliColumnChoice import GoozaliColumnChoice from jobspy.scrapers.goozali.model import GoozaliColumnChoice
class GoozaliColumnTypeOptions: class GoozaliColumnTypeOptions:

View File

@ -0,0 +1,31 @@
from enum import Enum
class GoozaliFieldChoice(Enum):
PRODUCT_MANAGEMENT = "Product Management"
DATA_ANALYST = "Data Analyst"
DATA_SCIENCE_ML_ALGORITHMS = "Data Science, ML & Algorithms"
SOFTWARE_ENGINEERING = "Software Engineering"
QA = "QA"
CYBERSECURITY = "Cybersecurity"
IT_AND_SYSTEM_ADMINISTRATION = "IT and System Administration"
FRONTEND_DEVELOPMENT = "Frontend Development"
DEVOPS = "DevOps"
UI_UX_DESIGN_CONTENT = "UI/UX, Design & Content"
HR_RECRUITMENT = "HR & Recruitment"
MOBILE_DEVELOPMENT = "Mobile Development"
HARDWARE_ENGINEERING = "Hardware Engineering"
EMBEDDED_LOW_LEVEL_FIRMWARE_ENGINEERING = "Embedded, Low Level & Firmware Engineering"
CUSTOMER_SUCCESS = "Customer Success"
PROJECT_MANAGEMENT = "Project Management"
OPERATIONS = "Operations"
FINANCE = "Finance"
SYSTEMS_ENGINEERING = "Systems Engineering"
MARKETING = "Marketing"
SALES = "Sales"
COMPLIANCE_LEGAL_POLICY = "Compliance, Legal & Policy"
C_LEVEL = "C-Level"
BUSINESS_DEVELOPMENT = "Business Development"
MECHANICAL_ENGINEERING = "Mechanical Engineering"
NATURAL_SCIENCE = "Natural Science"
OTHER = "Other"

View File

@ -1,4 +1,4 @@
from jobspy.scrapers.goozali.model.GozaaliResponseData import GoozaliResponseData from jobspy.scrapers.goozali.model import GoozaliResponseData
class GoozaliResponse: class GoozaliResponse:

View File

@ -1,5 +1,4 @@
from jobspy.scrapers.goozali.model import GoozaliRow from jobspy.scrapers.goozali.model import GoozaliRow, GoozaliColumn
from jobspy.scrapers.goozali.model.GoozaliColumn import GoozaliColumn
class GoozaliResponseData: class GoozaliResponseData:

View File

@ -2,5 +2,8 @@ from .GoozaliRow import GoozaliRow
from .GoozaliResponse import GoozaliResponse from .GoozaliResponse import GoozaliResponse
from .GoozaliColumn import GoozaliColumn from .GoozaliColumn import GoozaliColumn
from .GoozaliPartRequest import GoozaliPartRequest from .GoozaliPartRequest import GoozaliPartRequest
from .FullRequest import GoozaliFullRequest from .GoozaliFullRequest import GoozaliFullRequest
from .GoozaliColumnTypeOptions import GoozaliColumnTypeOptions from .GoozaliColumnTypeOptions import GoozaliColumnTypeOptions
from .GoozaliFieldChoice import GoozaliFieldChoice
from .GoozaliResponseData import GoozaliResponseData
from .GoozaliColumnChoice import GoozaliColumnChoice

View File

@ -1,14 +1,11 @@
import json import json
import os import os
from jobspy import scrape_jobs
import pandas as pd
from jobspy.jobs import JobPost from jobspy.jobs import JobPost
from jobspy.scrapers.goozali.GoozaliMapper import GoozaliMapper from jobspy.scrapers.goozali.GoozaliMapper import GoozaliMapper
from jobspy.scrapers.goozali.GoozaliScrapperComponent import GoozaliScrapperComponent from jobspy.scrapers.goozali.GoozaliScrapperComponent import GoozaliScrapperComponent
from jobspy.scrapers.goozali.constants import CHOICE_FIELD_KEY, extract_goozali_column_name, job_post_column_to_goozali_column from jobspy.scrapers.goozali.constants import extract_goozali_column_name, job_post_column_to_goozali_column
from jobspy.scrapers.goozali.model import GoozaliColumn from jobspy.scrapers.goozali.model import GoozaliColumn, GoozaliFieldChoice, GoozaliResponseData
from jobspy.scrapers.goozali.model.GozaaliResponseData import GoozaliResponseData
from jobspy.scrapers.utils import create_dict_by_key_and_value from jobspy.scrapers.utils import create_dict_by_key_and_value
# URL Example # URL Example
# https://airtable.com/v0.3/view/viwagEIbkfz2iMsLU/readSharedViewData?stringifiedObjectParams=%7B%22shouldUseNestedResponseFormat%22%3Atrue%7D&requestId=reqXyRSHWlXyiRgY9&accessPolicy=%7B%22allowedActions%22%3A%5B%7B%22modelClassName%22%3A%22view%22%2C%22modelIdSelector%22%3A%22viwagEIbkfz2iMsLU%22%2C%22action%22%3A%22readSharedViewData%22%7D%2C%7B%22modelClassName%22%3A%22view%22%2C%22modelIdSelector%22%3A%22viwagEIbkfz2iMsLU%22%2C%22action%22%3A%22getMetadataForPrinting%22%7D%2C%7B%22modelClassName%22%3A%22view%22%2C%22modelIdSelector%22%3A%22viwagEIbkfz2iMsLU%22%2C%22action%22%3A%22readSignedAttachmentUrls%22%7D%2C%7B%22modelClassName%22%3A%22row%22%2C%22modelIdSelector%22%3A%22rows%20*%5BdisplayedInView%3DviwagEIbkfz2iMsLU%5D%22%2C%22action%22%3A%22createDocumentPreviewSession%22%7D%5D%2C%22shareId%22%3A%22shr97tl6luEk4Ca9R%22%2C%22applicationId%22%3A%22app5sYJyDgcRbJWYU%22%2C%22generationNumber%22%3A0%2C%22expires%22%3A%222025-01-02T00%3A00%3A00.000Z%22%2C%22signature%22%3A%223aa292ee44d15aa75d9506200329e413653471f89e000fa370ef9fa38393070a%22%7D # https://airtable.com/v0.3/view/viwagEIbkfz2iMsLU/readSharedViewData?stringifiedObjectParams=%7B%22shouldUseNestedResponseFormat%22%3Atrue%7D&requestId=reqXyRSHWlXyiRgY9&accessPolicy=%7B%22allowedActions%22%3A%5B%7B%22modelClassName%22%3A%22view%22%2C%22modelIdSelector%22%3A%22viwagEIbkfz2iMsLU%22%2C%22action%22%3A%22readSharedViewData%22%7D%2C%7B%22modelClassName%22%3A%22view%22%2C%22modelIdSelector%22%3A%22viwagEIbkfz2iMsLU%22%2C%22action%22%3A%22getMetadataForPrinting%22%7D%2C%7B%22modelClassName%22%3A%22view%22%2C%22modelIdSelector%22%3A%22viwagEIbkfz2iMsLU%22%2C%22action%22%3A%22readSignedAttachmentUrls%22%7D%2C%7B%22modelClassName%22%3A%22row%22%2C%22modelIdSelector%22%3A%22rows%20*%5BdisplayedInView%3DviwagEIbkfz2iMsLU%5D%22%2C%22action%22%3A%22createDocumentPreviewSession%22%7D%5D%2C%22shareId%22%3A%22shr97tl6luEk4Ca9R%22%2C%22applicationId%22%3A%22app5sYJyDgcRbJWYU%22%2C%22generationNumber%22%3A0%2C%22expires%22%3A%222025-01-02T00%3A00%3A00.000Z%22%2C%22signature%22%3A%223aa292ee44d15aa75d9506200329e413653471f89e000fa370ef9fa38393070a%22%7D
@ -31,7 +28,7 @@ try:
column = component.find_column( column = component.find_column(
response_data.columns, job_post_column_to_goozali_column["field"]) response_data.columns, job_post_column_to_goozali_column["field"])
column_choice = component.find_choice_from_column( column_choice = component.find_choice_from_column(
column, CHOICE_FIELD_KEY) column, GoozaliFieldChoice.SOFTWARE_ENGINEERING)
filtered_rows_by_column_choice = component.filter_rows_by_column_choice( filtered_rows_by_column_choice = component.filter_rows_by_column_choice(
response_data.rows, column, column_choice) response_data.rows, column, column_choice)