diff --git a/src/jobspy/scrapers/goozali/__init__.py b/src/jobspy/scrapers/goozali/__init__.py index 5b4a44e..117a8db 100644 --- a/src/jobspy/scrapers/goozali/__init__.py +++ b/src/jobspy/scrapers/goozali/__init__.py @@ -7,6 +7,7 @@ This module contains routines to scrape Goozali. from __future__ import annotations + from jobspy.scrapers import Scraper, ScraperInput from jobspy.scrapers.site import Site @@ -42,14 +43,14 @@ class GoozaliScraper(Scraper): self.base_url = "https://airtable.com/v0.3/view/{view_id}/readSharedViewData" def _get_params(self, view_id: str) -> dict[str, str]: - access_policy = get_access_policy(view_id) + access_policy: str = get_access_policy(view_id) params = { "stringifiedObjectParams": stringifiedObjectParams, "request_id": request_id, "accessPolicy": access_policy } - return {k: v for k, v in params.items() if v is not None} + return params def scrape(self, scraper_input: ScraperInput) -> JobResponse: """ diff --git a/src/jobspy/scrapers/goozali/constants.py b/src/jobspy/scrapers/goozali/constants.py index 4fe3af6..88adcf3 100644 --- a/src/jobspy/scrapers/goozali/constants.py +++ b/src/jobspy/scrapers/goozali/constants.py @@ -1,25 +1,32 @@ +import json + + view_ids = ["viwIOzPYaUGxlA0Jd"] headers = { - "accept": "*/*", - "accept-language": "en-US,en;q=0.9,he-IL;q=0.8,he;q=0.7", - "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36", - "x-airtable-accept-msgpack": "true", - "x-airtable-application-id": "appwewqLk7iUY4azc", - "x-requested-with": "XMLHttpRequest" + 'accept': '*/*', + 'accept-language': 'en-US,en;q=0.9,he-IL;q=0.8,he;q=0.7', + 'priority': 'u=1, i', + 'sec-ch-ua': '"Google Chrome";v="131", "Chromium";v="131", "Not_A Brand";v="24"', + 'sec-ch-ua-mobile': '?0', + 'sec-ch-ua-platform': '"Windows"', + 'sec-fetch-dest': 'empty', + 'sec-fetch-mode': 'cors', + 'sec-fetch-site': 'same-origin', + 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36', + 'x-airtable-accept-msgpack': 'true', + 'x-airtable-application-id': 'appwewqLk7iUY4azc', + 'x-airtable-inter-service-client': 'webClient', + 'x-airtable-page-load-id': 'pglqAAzFDZEWCEC7s', + 'x-early-prefetch': 'true', + 'x-requested-with': 'XMLHttpRequest', + 'x-time-zone': 'Asia/Jerusalem', + 'x-user-locale': 'en' } session_id = "lWt/xRLIQas/blkys/2YBYl0priNI7gv85sXXtmkrW+TzbLHR8Vm6iY5RDialmLUYsQgLab8uWZyahWRw0HizxdOXhJxd5FB66H85GpUAX8zZbAZPZdUHvzxjaVa130w14QSXDa8OmsNlpKtiUtZ/DXMTOZ1wYDWC4tVJTKJ171wyKA7C9E=" -cookies = { - "__Host-airtable-session": "eyJzZXNzaW9uSWQiOiJzZXNxdFV4bVdKRVRoVGtRMiIsImNzcmZTZWNyZXQiOiIyT0JrVTJkU2I4bDA3NFZIRmd6eTdjTHUifQ==", - "__Host-airtable-session.sig": "heWRrVH73Aa-2ALrH4c_CbvQqTNbNRv9VjPZYv3aHJ4", - "brw": "brwtN7N3OgPFrtfb2", - "brwConsent": "opt-in", - "acq": "eyJhY3F1aXNpdGlvbiI6Ilt7XCJwbGF0Zm9ybVwiOlwiZGVza3RvcFwiLFwib3JpZ2luXCI6XCJsb2dpblwiLFwidG91Y2hUaW1lXCI6XCIyMDI0LTEyLTEyVDE3OjU1OjQyLjU3OVpcIn1dIn0=", - "acq.sig": "5xrqXjip4IJZxIeSPCkajWt_wlBmGw-k7HJCj8wicxU", - "AWSALBTGCORS": "YoIaU+wibkMfutpYUIlGnvYmnUa0VjM2ukwIhESaxfQUNL+PkCcRm5MIXVI5Q+dNJn7rAfdvTlrSF8XXU7wIWQqg8DQn2+OmvFeR5uzreWH5QaRIodTZ5gVQpXK1A62oDSR18fgyIOBRza2wIiet/67JgimPxGpuecdbz2oUwr7UqifGVz0=" -} +cookies = {} request_id = "req4q4tKw3woEEWxw&" share_id = "shrQBuWjXd0YgPqV6" @@ -28,35 +35,25 @@ signature = "be8bd40c133f051f929ebab311c416013f5af0d5acae4264575b88ccf051ee59" def get_access_policy(view_id: str) -> dict[str, str]: - return { + access_policy = { "allowedActions": [ - { - "modelClassName": "view", - "modelIdSelector": view_id, - "action": "readSharedViewData" - }, - { - "modelClassName": "view", - "modelIdSelector": view_id, - "action": "getMetadataForPrinting" - }, - { - "modelClassName": "view", - "modelIdSelector": view_id, - "action": "readSignedAttachmentUrls" - }, - { - "modelClassName": "row", - "modelIdSelector": f"rows *[displayedInView={view_id}]", - "action": "createDocumentPreviewSession" - } + {"modelClassName": "view", "modelIdSelector": view_id, + "action": "readSharedViewData"}, + {"modelClassName": "view", "modelIdSelector": view_id, + "action": "getMetadataForPrinting"}, + {"modelClassName": "view", "modelIdSelector": view_id, + "action": "readSignedAttachmentUrls"}, + {"modelClassName": "row", "modelIdSelector": f"rows *[displayedInView={view_id}]", + "action": "createDocumentPreviewSession"} ], "shareId": share_id, "applicationId": application_id, "generationNumber": 0, - # "expires": "2025-01-02T00:00:00.000Z", # todo:: check how to set it + "expires": "2025-01-02T00:00:00.000Z", "signature": signature } + # Convert to a JSON string + return json.dumps(access_policy) stringifiedObjectParams = {"shouldUseNestedResponseFormat": "true"} diff --git a/src/jobspy/scrapers/goozali/temp_scrapper.py b/src/jobspy/scrapers/goozali/temp_scrapper.py new file mode 100644 index 0000000..7b4c948 --- /dev/null +++ b/src/jobspy/scrapers/goozali/temp_scrapper.py @@ -0,0 +1,36 @@ +import requests + +url = 'https://airtable.com/v0.3/view/viwIOzPYaUGxlA0Jd/readSharedViewData' +params = { + 'stringifiedObjectParams': '{"shouldUseNestedResponseFormat":true}', + 'requestId': 'req4q4tKw3woEEWxw', + 'accessPolicy': '{"allowedActions":[{"modelClassName":"view","modelIdSelector":"viwIOzPYaUGxlA0Jd","action":"readSharedViewData"},{"modelClassName":"view","modelIdSelector":"viwIOzPYaUGxlA0Jd","action":"getMetadataForPrinting"},{"modelClassName":"view","modelIdSelector":"viwIOzPYaUGxlA0Jd","action":"readSignedAttachmentUrls"},{"modelClassName":"row","modelIdSelector":"rows *[displayedInView=viwIOzPYaUGxlA0Jd]","action":"createDocumentPreviewSession"}],"shareId":"shrQBuWjXd0YgPqV6","applicationId":"appwewqLk7iUY4azc","generationNumber":0,"expires":"2025-01-02T00:00:00.000Z","signature":"be8bd40c133f051f929ebab311c416013f5af0d5acae4264575b88ccf051ee59"}' +} + +headers = { + 'accept': '*/*', + 'accept-language': 'en-US,en;q=0.9,he-IL;q=0.8,he;q=0.7', + 'cookie': '__Host-airtable-session=eyJzZXNzaW9uSWQiOiJzZXNxdFV4bVdKRVRoVGtRMiIsImNzcmZTZWNyZXQiOiIyT0JrVTJkU2I4bDA3NFZIRmd6eTdjTHUifQ==; __Host-airtable-session.sig=heWRrVH73Aa-2ALrH4c_CbvQqTNbNRv9VjPZYv3aHJ4; brw=brwtN7N3OgPFrtfb2; acq=eyJhY3F1aXNpdGlvbiI6Ilt7XCJwbGF0Zm9ybVwiOlwiZGVza3RvcFwiLFwib3JpZ2luXCI6XCJsb2dpblwiLFwidG91Y2hUaW1lXCI6XCIyMDI0LTEyLTEyVDE3OjU1OjQyLjU3OVpcIn1dIn0=; acq.sig=5xrqXjip4IJZxIeSPCkajWt_wlBmGw-k7HJCj8wicxU; brwConsent=opt-in; AWSALBTGCORS=YoIaU+wibkMfutpYUIlGnvYmnUa0VjM2ukwIhESaxfQUNL+PkCcRm5MIXVI5Q+dNJn7rAfdvTlrSF8XXU7wIWQqg8DQn2+OmvFeR5uzreWH5QaRIodTZ5gVQpXK1A62oDSR18fgyIOBRza2wIiet/67JgimPxGpuecdbz2oUwr7UqifGVz0=; AWSALBTG=lWt/xRLIQas/blkys/2YBYl0priNI7gv85sXXtmkrW+TzbLHR8Vm6iY5RDialmLUYsQgLab8uWZyahWRw0HizxdOXhJxd5FB66H85GpUAX8zZbAZPZdUHvzxjaVa130w14QSXDa8OmsNlpKtiUtZ/DXMTOZ1wYDWC4tVJTKJ171wyKA7C9E=; AWSALBTGCORS=lWt/xRLIQas/blkys/2YBYl0priNI7gv85sXXtmkrW+TzbLHR8Vm6iY5RDialmLUYsQgLab8uWZyahWRw0HizxdOXhJxd5FB66H85GpUAX8zZbAZPZdUHvzxjaVa130w14QSXDa8OmsNlpKtiUtZ/DXMTOZ1wYDWC4tVJTKJ171wyKA7C9E=; __Host-airtable-session=eyJzZXNzaW9uSWQiOiJzZXNJU0RKYTBPb1I3QlE0WCJ9; __Host-airtable-session.sig=TCCFy2Z5tzMD0iDHuLNL6HzGQAWaTkjpUHH9QZWOIEo; brw=brwtN7N3OgPFrtfb2', + 'priority': 'u=1, i', + 'sec-ch-ua': '"Google Chrome";v="131", "Chromium";v="131", "Not_A Brand";v="24"', + 'sec-ch-ua-mobile': '?0', + 'sec-ch-ua-platform': '"Windows"', + 'sec-fetch-dest': 'empty', + 'sec-fetch-mode': 'cors', + 'sec-fetch-site': 'same-origin', + 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36', + 'x-airtable-accept-msgpack': 'true', + 'x-airtable-application-id': 'appwewqLk7iUY4azc', + 'x-airtable-inter-service-client': 'webClient', + 'x-airtable-page-load-id': 'pglqAAzFDZEWCEC7s', + 'x-early-prefetch': 'true', + 'x-requested-with': 'XMLHttpRequest', + 'x-time-zone': 'Asia/Jerusalem', + 'x-user-locale': 'en' +} + +response = requests.get(url, headers=headers, params=params) + +# Print the response content +print(response.status_code) +print(response.text)