docs: readme

pull/103/head
Cullen Watson 2024-02-09 13:59:20 -06:00
parent 5d6b1914e8
commit 0f2adb9723
2 changed files with 7 additions and 6 deletions

View File

@ -29,18 +29,20 @@ _Python version >= [3.10](https://www.python.org/downloads/release/python-3100/)
### Usage
```python
import csv
from jobspy import scrape_jobs
jobs = scrape_jobs(
site_name=["indeed", "linkedin", "zip_recruiter", "glassdoor"],
search_term="software engineer",
location="Dallas, TX",
results_wanted=10,
results_wanted=20,
hours_old=72, # (only linedin is hour specific, others round up to days old)
country_indeed='USA' # only needed for indeed / glassdoor
)
print(f"Found {len(jobs)} jobs")
print(jobs.head())
jobs.to_csv("jobs.csv", index=False) # to_xlsx
jobs.to_csv("jobs.csv", quoting=csv.QUOTE_NONNUMERIC, escapechar="\\", index=False) # to_xlsx
```
### Output
@ -73,7 +75,7 @@ Optional
├── linkedin_company_ids (list[int): searches for linkedin jobs with specific company ids
├── country_indeed (enum): filters the country on Indeed (see below for correct spelling)
├── offset (num): starts the search from an offset (e.g. 25 will start the search from the 25th result)
├── hours_old (int): filters jobs by the number of hours since the job was posted
├── hours_old (int): filters jobs by the number of hours since the job was posted (all but LinkedIn rounds up to next day)
```
### JobPost Schema

View File

@ -408,8 +408,7 @@ class IndeedScraper(Scraper):
)
return is_remote_in_attributes or is_remote_in_description or is_remote_in_location
@staticmethod
def get_job_details(job_keys: list[str]) -> dict:
def get_job_details(self, job_keys: list[str]) -> dict:
"""
Queries the GraphQL endpoint for detailed job information for the given job keys.
"""
@ -481,7 +480,7 @@ class IndeedScraper(Scraper):
}}
"""
}
response = requests.post(url, headers=headers, json=payload)
response = requests.post(url, headers=headers, json=payload, proxies=self.proxy)
if response.status_code == 200:
return response.json()['data']['jobData']['results']
else: