mirror of https://github.com/Bunsly/JobSpy
parent
ead8eb126d
commit
f2d5bb6cfa
|
@ -185,6 +185,7 @@ class Location(BaseModel):
|
|||
country: Country | str | None = None
|
||||
city: Optional[str] = None
|
||||
state: Optional[str] = None
|
||||
text: str = None
|
||||
|
||||
def display_location(self) -> str:
|
||||
location_parts = []
|
||||
|
@ -253,6 +254,12 @@ class DescriptionFormat(Enum):
|
|||
|
||||
|
||||
class JobPost(BaseModel):
|
||||
|
||||
# def __init__(self, obj):
|
||||
# super().__init__()
|
||||
# for key, value in obj.items():
|
||||
# setattr(self, key, value)
|
||||
|
||||
id: str | None = None
|
||||
title: str
|
||||
company_name: str | None
|
||||
|
@ -271,6 +278,7 @@ class JobPost(BaseModel):
|
|||
emails: list[str] | None = None
|
||||
is_remote: bool | None = None
|
||||
listing_type: str | None = None
|
||||
field: str | None = None
|
||||
|
||||
# linkedin specific
|
||||
job_level: str | None = None
|
||||
|
|
|
@ -1,10 +1,12 @@
|
|||
from datetime import datetime
|
||||
import json
|
||||
|
||||
from jobspy.jobs import JobPost
|
||||
from jobspy.jobs import JobPost, Location
|
||||
from jobspy.scrapers.goozali.model import GoozaliColumnTypeOptions, GoozaliResponse, GoozaliRow
|
||||
from jobspy.scrapers.goozali.model.GoozaliColumn import GoozaliColumn
|
||||
from jobspy.scrapers.goozali.model.GoozaliColumnChoice import GoozaliColumnChoice
|
||||
from jobspy.scrapers.goozali.model.GozaaliResponseData import GoozaliResponseData
|
||||
from .constants import job_post_column_to_goozali_column, job_post_column_names
|
||||
|
||||
# Mapping function to convert parsed dictionary into GoozaliResponseData
|
||||
|
||||
|
@ -77,6 +79,24 @@ class GoozaliMapper:
|
|||
# Return a new GoozaliResponse with msg and the converted data
|
||||
return GoozaliResponse(msg=data['msg'], data=data_obj)
|
||||
|
||||
def map_goozali_response_to_job_post(self, row: GoozaliRow, columns: dict[str, GoozaliColumn]) -> JobPost:
|
||||
def get_value_by_job_post_Id(self, job_post_column: str, row: GoozaliRow, dict_column_name_to_columnZ):
|
||||
goozali_column_name = job_post_column_to_goozali_column[job_post_column]
|
||||
column = dict_column_name_to_columnZ[goozali_column_name]
|
||||
value = row.cellValuesByColumnId[column.id]
|
||||
if (job_post_column == "location"):
|
||||
# todo: fix it
|
||||
return Location(text="tel aviv")
|
||||
|
||||
return JobPost()
|
||||
if (job_post_column == "date_posted"):
|
||||
return datetime.fromisoformat(value.replace("Z", "")).date()
|
||||
|
||||
return str(value)
|
||||
|
||||
def map_goozali_response_to_job_post(self, row: GoozaliRow, dict_column_name_to_column) -> JobPost:
|
||||
temp = {}
|
||||
for col in job_post_column_names:
|
||||
value = self.get_value_by_job_post_Id(
|
||||
col, row, dict_column_name_to_column)
|
||||
temp[col] = value
|
||||
|
||||
return JobPost.model_validate(temp)
|
||||
|
|
|
@ -18,7 +18,6 @@ from jobspy.scrapers.goozali.model.GoozaliColumnChoice import GoozaliColumnChoic
|
|||
from jobspy.scrapers.site import Site
|
||||
|
||||
from ..utils import create_session, create_logger
|
||||
from .constants import headers
|
||||
from ...jobs import (
|
||||
JobPost,
|
||||
JobResponse,
|
||||
|
|
|
@ -1,59 +1,21 @@
|
|||
import json
|
||||
|
||||
|
||||
view_ids = ["viwIOzPYaUGxlA0Jd"]
|
||||
|
||||
headers = {
|
||||
'accept': '*/*',
|
||||
'accept-language': 'en-US,en;q=0.9,he-IL;q=0.8,he;q=0.7',
|
||||
'priority': 'u=1, i',
|
||||
'sec-ch-ua': '"Google Chrome";v="131", "Chromium";v="131", "Not_A Brand";v="24"',
|
||||
'sec-ch-ua-mobile': '?0',
|
||||
'sec-ch-ua-platform': '"Windows"',
|
||||
'sec-fetch-dest': 'empty',
|
||||
'sec-fetch-mode': 'cors',
|
||||
'sec-fetch-site': 'same-origin',
|
||||
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
|
||||
'x-airtable-accept-msgpack': 'true',
|
||||
'x-airtable-application-id': 'appwewqLk7iUY4azc',
|
||||
'x-airtable-inter-service-client': 'webClient',
|
||||
'x-airtable-page-load-id': 'pglqAAzFDZEWCEC7s',
|
||||
'x-early-prefetch': 'true',
|
||||
'x-requested-with': 'XMLHttpRequest',
|
||||
'x-time-zone': 'Asia/Jerusalem',
|
||||
'x-user-locale': 'en'
|
||||
job_post_column_to_goozali_column = {
|
||||
"date_posted": "Discovered",
|
||||
"field": "Field",
|
||||
"title": "Job Title",
|
||||
"job_url": "Position Link",
|
||||
"company_name": "Company",
|
||||
"description": "Requirements",
|
||||
"location": "Location",
|
||||
"company_industry": "Company Industry",
|
||||
"id": "Job ID"
|
||||
}
|
||||
|
||||
session_id = "lWt/xRLIQas/blkys/2YBYl0priNI7gv85sXXtmkrW+TzbLHR8Vm6iY5RDialmLUYsQgLab8uWZyahWRw0HizxdOXhJxd5FB66H85GpUAX8zZbAZPZdUHvzxjaVa130w14QSXDa8OmsNlpKtiUtZ/DXMTOZ1wYDWC4tVJTKJ171wyKA7C9E="
|
||||
|
||||
cookies = {}
|
||||
|
||||
request_id = "req4q4tKw3woEEWxw&"
|
||||
share_id = "shrQBuWjXd0YgPqV6"
|
||||
application_id = "appwewqLk7iUY4azc"
|
||||
signature = "be8bd40c133f051f929ebab311c416013f5af0d5acae4264575b88ccf051ee59"
|
||||
|
||||
|
||||
def get_access_policy(view_id: str) -> dict[str, str]:
|
||||
access_policy = {
|
||||
"allowedActions": [
|
||||
{"modelClassName": "view", "modelIdSelector": view_id,
|
||||
"action": "readSharedViewData"},
|
||||
{"modelClassName": "view", "modelIdSelector": view_id,
|
||||
"action": "getMetadataForPrinting"},
|
||||
{"modelClassName": "view", "modelIdSelector": view_id,
|
||||
"action": "readSignedAttachmentUrls"},
|
||||
{"modelClassName": "row", "modelIdSelector": f"rows *[displayedInView={view_id}]",
|
||||
"action": "createDocumentPreviewSession"}
|
||||
],
|
||||
"shareId": share_id,
|
||||
"applicationId": application_id,
|
||||
"generationNumber": 0,
|
||||
"expires": "2025-01-02T00:00:00.000Z",
|
||||
"signature": signature
|
||||
}
|
||||
# Convert to a JSON string
|
||||
return json.dumps(access_policy)
|
||||
|
||||
|
||||
stringifiedObjectParams = {"shouldUseNestedResponseFormat": "true"}
|
||||
job_post_column_names = ["id",
|
||||
"date_posted",
|
||||
"field",
|
||||
"title",
|
||||
"job_url",
|
||||
"company_name",
|
||||
"description",
|
||||
"location",
|
||||
"company_industry"]
|
||||
|
|
|
@ -3,6 +3,7 @@ import os
|
|||
from jobspy import scrape_jobs
|
||||
import pandas as pd
|
||||
|
||||
from jobspy.jobs import JobPost
|
||||
from jobspy.scrapers.goozali.GoozaliMapper import GoozaliMapper
|
||||
from jobspy.scrapers.goozali.GoozaliScrapperComponent import GoozaliScrapperComponent
|
||||
from jobspy.scrapers.goozali.model import GoozaliColumn
|
||||
|
@ -35,13 +36,18 @@ try:
|
|||
filtered_rows_by_age_and_column_choice = component.filter_rows_by_hours(
|
||||
filtered_rows_by_column_choice, hours_old)
|
||||
|
||||
# Key mapper: Extract 'id' as the key
|
||||
def extract_goozali_column_id(column): return column.id if isinstance(
|
||||
# Key mapper: Extract 'name' as the key
|
||||
def extract_goozali_column_name(column): return column.name if isinstance(
|
||||
column, GoozaliColumn) else None
|
||||
dict_column_id_to_column = create_dict_by_key_and_value(
|
||||
response_data.columns, extract_goozali_column_id)
|
||||
dict_column_name_to_column = create_dict_by_key_and_value(
|
||||
response_data.columns, extract_goozali_column_name)
|
||||
response: list[JobPost] = []
|
||||
for row in filtered_rows_by_age_and_column_choice:
|
||||
job_post = mapper.map_goozali_response_to_job_post(
|
||||
row, dict_column_name_to_column)
|
||||
response.append(job_post)
|
||||
|
||||
print("hello heloo")
|
||||
print("kingggggg")
|
||||
except FileNotFoundError:
|
||||
print("The file was not found.")
|
||||
except json.JSONDecodeError:
|
||||
|
|
Loading…
Reference in New Issue