diff --git a/src/jobspy/scrapers/goozali/__init__.py b/src/jobspy/scrapers/goozali/__init__.py index 79b505e..3f73633 100644 --- a/src/jobspy/scrapers/goozali/__init__.py +++ b/src/jobspy/scrapers/goozali/__init__.py @@ -9,6 +9,7 @@ from __future__ import annotations from jobspy.scrapers import Scraper, ScraperInput +from jobspy.scrapers.goozali.model.GoozaliPartRequest import GoozaliPartRequest from jobspy.scrapers.goozali.model.FullRequest import GoozaliFullRequest from jobspy.scrapers.site import Site @@ -55,6 +56,7 @@ class GoozaliScraper(Scraper): seen_ids = set() for view_id in self.view_ids: full_request = GoozaliFullRequest(self.base_url) + part_request = GoozaliPartRequest(self.base_url) try: response = self.session.get( url=full_request.url, diff --git a/src/jobspy/scrapers/goozali/model/FullRequest.py b/src/jobspy/scrapers/goozali/model/FullRequest.py index 02de03d..3387ed8 100644 --- a/src/jobspy/scrapers/goozali/model/FullRequest.py +++ b/src/jobspy/scrapers/goozali/model/FullRequest.py @@ -9,7 +9,6 @@ class GoozaliFullRequest(): self.air_table_page_load_id: str = "pglqAAzFDZEWCEC7s" self.stringifiedObjectParams = { "shouldUseNestedResponseFormat": "true"} - self.session_id: str = "lWt/xRLIQas/blkys/2YBYl0priNI7gv85sXXtmkrW+TzbLHR8Vm6iY5RDialmLUYsQgLab8uWZyahWRw0HizxdOXhJxd5FB66H85GpUAX8zZbAZPZdUHvzxjaVa130w14QSXDa8OmsNlpKtiUtZ/DXMTOZ1wYDWC4tVJTKJ171wyKA7C9E=" self.cookies: dict[str, str] = {} self.request_id: str = "req4q4tKw3woEEWxw&" self.share_id: str = "shrQBuWjXd0YgPqV6" diff --git a/src/jobspy/scrapers/goozali/model/GoozaliPartRequest.py b/src/jobspy/scrapers/goozali/model/GoozaliPartRequest.py new file mode 100644 index 0000000..3e2b5d6 --- /dev/null +++ b/src/jobspy/scrapers/goozali/model/GoozaliPartRequest.py @@ -0,0 +1,74 @@ +import json + + +class GoozaliPartRequest(): + def __init__(self, base_url: str): + self.view_id: str = "viwNRSqqmqZLP0a3C" + self.url = base_url.format(view_id=self.view_id) + self.application_id: str = "app7OQjqEzTtCRq7u" + self.air_table_page_load_id: str = "pglG8mlPvtT0UiBaN" + self.stringifiedObjectParams = { + "shouldUseNestedResponseFormat": "true"} + self.session_id: str = "" + self.cookies: dict[str, str] = {} + self.request_id: str = "requFlC1ueInFAWHe" + self.share_id: str = "shrNtlFxOG2ag1kyB" + self.signature: str = "64689d9701d871b8f3a3fe8ad01de23c06421011eb92a8816399a9e2a869b523" + self.headers = self._generate_headers() + self.params = self._generate_params() + self.cookies = {} + + def _generate_params(self) -> dict[str, str]: + access_policy = self._generate_access_policy() + + return { + "stringifiedObjectParams": self.stringifiedObjectParams, + "request_id": self.request_id, + "accessPolicy": access_policy + } + + def _generate_headers(self) -> str: + return { + 'accept': '*/*', + 'accept-language': 'en-US,en;q=0.9,he-IL;q=0.8,he;q=0.7', + 'priority': 'u=1, i', + 'sec-ch-ua': '"Google Chrome";v="131", "Chromium";v="131", "Not_A Brand";v="24"', + 'sec-ch-ua-mobile': '?0', + 'sec-ch-ua-platform': '"Windows"', + 'sec-fetch-dest': 'empty', + 'sec-fetch-mode': 'cors', + 'sec-fetch-site': 'same-origin', + 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36', + 'x-airtable-accept-msgpack': 'true', + 'x-airtable-application-id': self.application_id, + 'x-airtable-inter-service-client': 'webClient', + 'x-airtable-page-load-id': self.air_table_page_load_id, + 'x-early-prefetch': 'true', + 'x-requested-with': 'XMLHttpRequest', + 'x-time-zone': 'Asia/Jerusalem', + 'x-user-locale': 'en' + } + + def _generate_access_policy(self) -> str: + """ + Generates a JSON string for access policy. + """ + access_policy = { + "allowedActions": [ + {"modelClassName": "view", "modelIdSelector": self.view_id, + "action": "readSharedViewData"}, + {"modelClassName": "view", "modelIdSelector": self.view_id, + "action": "getMetadataForPrinting"}, + {"modelClassName": "view", "modelIdSelector": self.view_id, + "action": "readSignedAttachmentUrls"}, + {"modelClassName": "row", "modelIdSelector": f"rows *[displayedInView={self.view_id}]", + "action": "createDocumentPreviewSession"} + ], + "shareId": self.share_id, + "applicationId": self.application_id, + "generationNumber": 0, + "expires": "2025-01-02T00:00:00.000Z", + "signature": self.signature + } + # Convert to a JSON string + return json.dumps(access_policy)