docs: readme

pull/103/head
Cullen Watson 2024-02-09 13:59:20 -06:00
parent 5d6b1914e8
commit 0f2adb9723
2 changed files with 7 additions and 6 deletions

View File

@ -29,18 +29,20 @@ _Python version >= [3.10](https://www.python.org/downloads/release/python-3100/)
### Usage ### Usage
```python ```python
import csv
from jobspy import scrape_jobs from jobspy import scrape_jobs
jobs = scrape_jobs( jobs = scrape_jobs(
site_name=["indeed", "linkedin", "zip_recruiter", "glassdoor"], site_name=["indeed", "linkedin", "zip_recruiter", "glassdoor"],
search_term="software engineer", search_term="software engineer",
location="Dallas, TX", location="Dallas, TX",
results_wanted=10, results_wanted=20,
hours_old=72, # (only linedin is hour specific, others round up to days old)
country_indeed='USA' # only needed for indeed / glassdoor country_indeed='USA' # only needed for indeed / glassdoor
) )
print(f"Found {len(jobs)} jobs") print(f"Found {len(jobs)} jobs")
print(jobs.head()) print(jobs.head())
jobs.to_csv("jobs.csv", index=False) # to_xlsx jobs.to_csv("jobs.csv", quoting=csv.QUOTE_NONNUMERIC, escapechar="\\", index=False) # to_xlsx
``` ```
### Output ### Output
@ -73,7 +75,7 @@ Optional
├── linkedin_company_ids (list[int): searches for linkedin jobs with specific company ids ├── linkedin_company_ids (list[int): searches for linkedin jobs with specific company ids
├── country_indeed (enum): filters the country on Indeed (see below for correct spelling) ├── country_indeed (enum): filters the country on Indeed (see below for correct spelling)
├── offset (num): starts the search from an offset (e.g. 25 will start the search from the 25th result) ├── offset (num): starts the search from an offset (e.g. 25 will start the search from the 25th result)
├── hours_old (int): filters jobs by the number of hours since the job was posted ├── hours_old (int): filters jobs by the number of hours since the job was posted (all but LinkedIn rounds up to next day)
``` ```
### JobPost Schema ### JobPost Schema

View File

@ -408,8 +408,7 @@ class IndeedScraper(Scraper):
) )
return is_remote_in_attributes or is_remote_in_description or is_remote_in_location return is_remote_in_attributes or is_remote_in_description or is_remote_in_location
@staticmethod def get_job_details(self, job_keys: list[str]) -> dict:
def get_job_details(job_keys: list[str]) -> dict:
""" """
Queries the GraphQL endpoint for detailed job information for the given job keys. Queries the GraphQL endpoint for detailed job information for the given job keys.
""" """
@ -481,7 +480,7 @@ class IndeedScraper(Scraper):
}} }}
""" """
} }
response = requests.post(url, headers=headers, json=payload) response = requests.post(url, headers=headers, json=payload, proxies=self.proxy)
if response.status_code == 200: if response.status_code == 200:
return response.json()['data']['jobData']['results'] return response.json()['data']['jobData']['results']
else: else: