mirror of https://github.com/Bunsly/JobSpy
parent
ead8eb126d
commit
f2d5bb6cfa
|
@ -185,6 +185,7 @@ class Location(BaseModel):
|
||||||
country: Country | str | None = None
|
country: Country | str | None = None
|
||||||
city: Optional[str] = None
|
city: Optional[str] = None
|
||||||
state: Optional[str] = None
|
state: Optional[str] = None
|
||||||
|
text: str = None
|
||||||
|
|
||||||
def display_location(self) -> str:
|
def display_location(self) -> str:
|
||||||
location_parts = []
|
location_parts = []
|
||||||
|
@ -253,6 +254,12 @@ class DescriptionFormat(Enum):
|
||||||
|
|
||||||
|
|
||||||
class JobPost(BaseModel):
|
class JobPost(BaseModel):
|
||||||
|
|
||||||
|
# def __init__(self, obj):
|
||||||
|
# super().__init__()
|
||||||
|
# for key, value in obj.items():
|
||||||
|
# setattr(self, key, value)
|
||||||
|
|
||||||
id: str | None = None
|
id: str | None = None
|
||||||
title: str
|
title: str
|
||||||
company_name: str | None
|
company_name: str | None
|
||||||
|
@ -271,6 +278,7 @@ class JobPost(BaseModel):
|
||||||
emails: list[str] | None = None
|
emails: list[str] | None = None
|
||||||
is_remote: bool | None = None
|
is_remote: bool | None = None
|
||||||
listing_type: str | None = None
|
listing_type: str | None = None
|
||||||
|
field: str | None = None
|
||||||
|
|
||||||
# linkedin specific
|
# linkedin specific
|
||||||
job_level: str | None = None
|
job_level: str | None = None
|
||||||
|
|
|
@ -1,10 +1,12 @@
|
||||||
|
from datetime import datetime
|
||||||
import json
|
import json
|
||||||
|
|
||||||
from jobspy.jobs import JobPost
|
from jobspy.jobs import JobPost, Location
|
||||||
from jobspy.scrapers.goozali.model import GoozaliColumnTypeOptions, GoozaliResponse, GoozaliRow
|
from jobspy.scrapers.goozali.model import GoozaliColumnTypeOptions, GoozaliResponse, GoozaliRow
|
||||||
from jobspy.scrapers.goozali.model.GoozaliColumn import GoozaliColumn
|
from jobspy.scrapers.goozali.model.GoozaliColumn import GoozaliColumn
|
||||||
from jobspy.scrapers.goozali.model.GoozaliColumnChoice import GoozaliColumnChoice
|
from jobspy.scrapers.goozali.model.GoozaliColumnChoice import GoozaliColumnChoice
|
||||||
from jobspy.scrapers.goozali.model.GozaaliResponseData import GoozaliResponseData
|
from jobspy.scrapers.goozali.model.GozaaliResponseData import GoozaliResponseData
|
||||||
|
from .constants import job_post_column_to_goozali_column, job_post_column_names
|
||||||
|
|
||||||
# Mapping function to convert parsed dictionary into GoozaliResponseData
|
# Mapping function to convert parsed dictionary into GoozaliResponseData
|
||||||
|
|
||||||
|
@ -77,6 +79,24 @@ class GoozaliMapper:
|
||||||
# Return a new GoozaliResponse with msg and the converted data
|
# Return a new GoozaliResponse with msg and the converted data
|
||||||
return GoozaliResponse(msg=data['msg'], data=data_obj)
|
return GoozaliResponse(msg=data['msg'], data=data_obj)
|
||||||
|
|
||||||
def map_goozali_response_to_job_post(self, row: GoozaliRow, columns: dict[str, GoozaliColumn]) -> JobPost:
|
def get_value_by_job_post_Id(self, job_post_column: str, row: GoozaliRow, dict_column_name_to_columnZ):
|
||||||
|
goozali_column_name = job_post_column_to_goozali_column[job_post_column]
|
||||||
|
column = dict_column_name_to_columnZ[goozali_column_name]
|
||||||
|
value = row.cellValuesByColumnId[column.id]
|
||||||
|
if (job_post_column == "location"):
|
||||||
|
# todo: fix it
|
||||||
|
return Location(text="tel aviv")
|
||||||
|
|
||||||
return JobPost()
|
if (job_post_column == "date_posted"):
|
||||||
|
return datetime.fromisoformat(value.replace("Z", "")).date()
|
||||||
|
|
||||||
|
return str(value)
|
||||||
|
|
||||||
|
def map_goozali_response_to_job_post(self, row: GoozaliRow, dict_column_name_to_column) -> JobPost:
|
||||||
|
temp = {}
|
||||||
|
for col in job_post_column_names:
|
||||||
|
value = self.get_value_by_job_post_Id(
|
||||||
|
col, row, dict_column_name_to_column)
|
||||||
|
temp[col] = value
|
||||||
|
|
||||||
|
return JobPost.model_validate(temp)
|
||||||
|
|
|
@ -18,7 +18,6 @@ from jobspy.scrapers.goozali.model.GoozaliColumnChoice import GoozaliColumnChoic
|
||||||
from jobspy.scrapers.site import Site
|
from jobspy.scrapers.site import Site
|
||||||
|
|
||||||
from ..utils import create_session, create_logger
|
from ..utils import create_session, create_logger
|
||||||
from .constants import headers
|
|
||||||
from ...jobs import (
|
from ...jobs import (
|
||||||
JobPost,
|
JobPost,
|
||||||
JobResponse,
|
JobResponse,
|
||||||
|
|
|
@ -1,59 +1,21 @@
|
||||||
import json
|
job_post_column_to_goozali_column = {
|
||||||
|
"date_posted": "Discovered",
|
||||||
|
"field": "Field",
|
||||||
view_ids = ["viwIOzPYaUGxlA0Jd"]
|
"title": "Job Title",
|
||||||
|
"job_url": "Position Link",
|
||||||
headers = {
|
"company_name": "Company",
|
||||||
'accept': '*/*',
|
"description": "Requirements",
|
||||||
'accept-language': 'en-US,en;q=0.9,he-IL;q=0.8,he;q=0.7',
|
"location": "Location",
|
||||||
'priority': 'u=1, i',
|
"company_industry": "Company Industry",
|
||||||
'sec-ch-ua': '"Google Chrome";v="131", "Chromium";v="131", "Not_A Brand";v="24"',
|
"id": "Job ID"
|
||||||
'sec-ch-ua-mobile': '?0',
|
|
||||||
'sec-ch-ua-platform': '"Windows"',
|
|
||||||
'sec-fetch-dest': 'empty',
|
|
||||||
'sec-fetch-mode': 'cors',
|
|
||||||
'sec-fetch-site': 'same-origin',
|
|
||||||
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
|
|
||||||
'x-airtable-accept-msgpack': 'true',
|
|
||||||
'x-airtable-application-id': 'appwewqLk7iUY4azc',
|
|
||||||
'x-airtable-inter-service-client': 'webClient',
|
|
||||||
'x-airtable-page-load-id': 'pglqAAzFDZEWCEC7s',
|
|
||||||
'x-early-prefetch': 'true',
|
|
||||||
'x-requested-with': 'XMLHttpRequest',
|
|
||||||
'x-time-zone': 'Asia/Jerusalem',
|
|
||||||
'x-user-locale': 'en'
|
|
||||||
}
|
}
|
||||||
|
|
||||||
session_id = "lWt/xRLIQas/blkys/2YBYl0priNI7gv85sXXtmkrW+TzbLHR8Vm6iY5RDialmLUYsQgLab8uWZyahWRw0HizxdOXhJxd5FB66H85GpUAX8zZbAZPZdUHvzxjaVa130w14QSXDa8OmsNlpKtiUtZ/DXMTOZ1wYDWC4tVJTKJ171wyKA7C9E="
|
job_post_column_names = ["id",
|
||||||
|
"date_posted",
|
||||||
cookies = {}
|
"field",
|
||||||
|
"title",
|
||||||
request_id = "req4q4tKw3woEEWxw&"
|
"job_url",
|
||||||
share_id = "shrQBuWjXd0YgPqV6"
|
"company_name",
|
||||||
application_id = "appwewqLk7iUY4azc"
|
"description",
|
||||||
signature = "be8bd40c133f051f929ebab311c416013f5af0d5acae4264575b88ccf051ee59"
|
"location",
|
||||||
|
"company_industry"]
|
||||||
|
|
||||||
def get_access_policy(view_id: str) -> dict[str, str]:
|
|
||||||
access_policy = {
|
|
||||||
"allowedActions": [
|
|
||||||
{"modelClassName": "view", "modelIdSelector": view_id,
|
|
||||||
"action": "readSharedViewData"},
|
|
||||||
{"modelClassName": "view", "modelIdSelector": view_id,
|
|
||||||
"action": "getMetadataForPrinting"},
|
|
||||||
{"modelClassName": "view", "modelIdSelector": view_id,
|
|
||||||
"action": "readSignedAttachmentUrls"},
|
|
||||||
{"modelClassName": "row", "modelIdSelector": f"rows *[displayedInView={view_id}]",
|
|
||||||
"action": "createDocumentPreviewSession"}
|
|
||||||
],
|
|
||||||
"shareId": share_id,
|
|
||||||
"applicationId": application_id,
|
|
||||||
"generationNumber": 0,
|
|
||||||
"expires": "2025-01-02T00:00:00.000Z",
|
|
||||||
"signature": signature
|
|
||||||
}
|
|
||||||
# Convert to a JSON string
|
|
||||||
return json.dumps(access_policy)
|
|
||||||
|
|
||||||
|
|
||||||
stringifiedObjectParams = {"shouldUseNestedResponseFormat": "true"}
|
|
||||||
|
|
|
@ -3,6 +3,7 @@ import os
|
||||||
from jobspy import scrape_jobs
|
from jobspy import scrape_jobs
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
|
from jobspy.jobs import JobPost
|
||||||
from jobspy.scrapers.goozali.GoozaliMapper import GoozaliMapper
|
from jobspy.scrapers.goozali.GoozaliMapper import GoozaliMapper
|
||||||
from jobspy.scrapers.goozali.GoozaliScrapperComponent import GoozaliScrapperComponent
|
from jobspy.scrapers.goozali.GoozaliScrapperComponent import GoozaliScrapperComponent
|
||||||
from jobspy.scrapers.goozali.model import GoozaliColumn
|
from jobspy.scrapers.goozali.model import GoozaliColumn
|
||||||
|
@ -35,13 +36,18 @@ try:
|
||||||
filtered_rows_by_age_and_column_choice = component.filter_rows_by_hours(
|
filtered_rows_by_age_and_column_choice = component.filter_rows_by_hours(
|
||||||
filtered_rows_by_column_choice, hours_old)
|
filtered_rows_by_column_choice, hours_old)
|
||||||
|
|
||||||
# Key mapper: Extract 'id' as the key
|
# Key mapper: Extract 'name' as the key
|
||||||
def extract_goozali_column_id(column): return column.id if isinstance(
|
def extract_goozali_column_name(column): return column.name if isinstance(
|
||||||
column, GoozaliColumn) else None
|
column, GoozaliColumn) else None
|
||||||
dict_column_id_to_column = create_dict_by_key_and_value(
|
dict_column_name_to_column = create_dict_by_key_and_value(
|
||||||
response_data.columns, extract_goozali_column_id)
|
response_data.columns, extract_goozali_column_name)
|
||||||
|
response: list[JobPost] = []
|
||||||
|
for row in filtered_rows_by_age_and_column_choice:
|
||||||
|
job_post = mapper.map_goozali_response_to_job_post(
|
||||||
|
row, dict_column_name_to_column)
|
||||||
|
response.append(job_post)
|
||||||
|
|
||||||
print("hello heloo")
|
print("kingggggg")
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
print("The file was not found.")
|
print("The file was not found.")
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
|
|
Loading…
Reference in New Issue