docs: readme

2024-02-09 13:59:20 -06:00 · 2024-02-09 13:59:20 -06:00 · 0f2adb9723
parent 5d6b1914e8
commit 0f2adb9723
2 changed files with 7 additions and 6 deletions
--- a/README.md
+++ b/README.md
@ -29,18 +29,20 @@ _Python version >= [3.10](https://www.python.org/downloads/release/python-3100/)
 ### Usage

 ```python
+import csv
 from jobspy import scrape_jobs

 jobs = scrape_jobs(
    site_name=["indeed", "linkedin", "zip_recruiter", "glassdoor"],
    search_term="software engineer",
    location="Dallas, TX",
-    results_wanted=10,
+    results_wanted=20,
+    hours_old=72, # (only linedin is hour specific, others round up to days old)
    country_indeed='USA'  # only needed for indeed / glassdoor
 )
 print(f"Found {len(jobs)} jobs")
 print(jobs.head())
-jobs.to_csv("jobs.csv", index=False) # to_xlsx
+jobs.to_csv("jobs.csv", quoting=csv.QUOTE_NONNUMERIC, escapechar="\\", index=False) # to_xlsx
 ```

 ### Output
@ -73,7 +75,7 @@ Optional
 ├── linkedin_company_ids (list[int): searches for linkedin jobs with specific company ids
 ├── country_indeed (enum): filters the country on Indeed (see below for correct spelling)
 ├── offset (num): starts the search from an offset (e.g. 25 will start the search from the 25th result)
-├── hours_old (int): filters jobs by the number of hours since the job was posted
+├── hours_old (int): filters jobs by the number of hours since the job was posted (all but LinkedIn rounds up to next day)
 ```

 ### JobPost Schema
--- a/src/jobspy/scrapers/indeed/init.py
+++ b/src/jobspy/scrapers/indeed/init.py
@ -408,8 +408,7 @@ class IndeedScraper(Scraper):
        )
        return is_remote_in_attributes or is_remote_in_description or is_remote_in_location

-    @staticmethod
-    def get_job_details(job_keys: list[str]) -> dict:
+    def get_job_details(self, job_keys: list[str]) -> dict:
        """
        Queries the GraphQL endpoint for detailed job information for the given job keys.
        """
@ -481,7 +480,7 @@ class IndeedScraper(Scraper):
            }}
            """
        }
-        response = requests.post(url, headers=headers, json=payload)
+        response = requests.post(url, headers=headers, json=payload, proxies=self.proxy)
        if response.status_code == 200:
            return response.json()['data']['jobData']['results']
        else: