mirror of https://github.com/Bunsly/JobSpy
docs: readme
parent
5d6b1914e8
commit
0f2adb9723
|
@ -29,18 +29,20 @@ _Python version >= [3.10](https://www.python.org/downloads/release/python-3100/)
|
||||||
### Usage
|
### Usage
|
||||||
|
|
||||||
```python
|
```python
|
||||||
|
import csv
|
||||||
from jobspy import scrape_jobs
|
from jobspy import scrape_jobs
|
||||||
|
|
||||||
jobs = scrape_jobs(
|
jobs = scrape_jobs(
|
||||||
site_name=["indeed", "linkedin", "zip_recruiter", "glassdoor"],
|
site_name=["indeed", "linkedin", "zip_recruiter", "glassdoor"],
|
||||||
search_term="software engineer",
|
search_term="software engineer",
|
||||||
location="Dallas, TX",
|
location="Dallas, TX",
|
||||||
results_wanted=10,
|
results_wanted=20,
|
||||||
|
hours_old=72, # (only linedin is hour specific, others round up to days old)
|
||||||
country_indeed='USA' # only needed for indeed / glassdoor
|
country_indeed='USA' # only needed for indeed / glassdoor
|
||||||
)
|
)
|
||||||
print(f"Found {len(jobs)} jobs")
|
print(f"Found {len(jobs)} jobs")
|
||||||
print(jobs.head())
|
print(jobs.head())
|
||||||
jobs.to_csv("jobs.csv", index=False) # to_xlsx
|
jobs.to_csv("jobs.csv", quoting=csv.QUOTE_NONNUMERIC, escapechar="\\", index=False) # to_xlsx
|
||||||
```
|
```
|
||||||
|
|
||||||
### Output
|
### Output
|
||||||
|
@ -73,7 +75,7 @@ Optional
|
||||||
├── linkedin_company_ids (list[int): searches for linkedin jobs with specific company ids
|
├── linkedin_company_ids (list[int): searches for linkedin jobs with specific company ids
|
||||||
├── country_indeed (enum): filters the country on Indeed (see below for correct spelling)
|
├── country_indeed (enum): filters the country on Indeed (see below for correct spelling)
|
||||||
├── offset (num): starts the search from an offset (e.g. 25 will start the search from the 25th result)
|
├── offset (num): starts the search from an offset (e.g. 25 will start the search from the 25th result)
|
||||||
├── hours_old (int): filters jobs by the number of hours since the job was posted
|
├── hours_old (int): filters jobs by the number of hours since the job was posted (all but LinkedIn rounds up to next day)
|
||||||
```
|
```
|
||||||
|
|
||||||
### JobPost Schema
|
### JobPost Schema
|
||||||
|
|
|
@ -408,8 +408,7 @@ class IndeedScraper(Scraper):
|
||||||
)
|
)
|
||||||
return is_remote_in_attributes or is_remote_in_description or is_remote_in_location
|
return is_remote_in_attributes or is_remote_in_description or is_remote_in_location
|
||||||
|
|
||||||
@staticmethod
|
def get_job_details(self, job_keys: list[str]) -> dict:
|
||||||
def get_job_details(job_keys: list[str]) -> dict:
|
|
||||||
"""
|
"""
|
||||||
Queries the GraphQL endpoint for detailed job information for the given job keys.
|
Queries the GraphQL endpoint for detailed job information for the given job keys.
|
||||||
"""
|
"""
|
||||||
|
@ -481,7 +480,7 @@ class IndeedScraper(Scraper):
|
||||||
}}
|
}}
|
||||||
"""
|
"""
|
||||||
}
|
}
|
||||||
response = requests.post(url, headers=headers, json=payload)
|
response = requests.post(url, headers=headers, json=payload, proxies=self.proxy)
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
return response.json()['data']['jobData']['results']
|
return response.json()['data']['jobData']['results']
|
||||||
else:
|
else:
|
||||||
|
|
Loading…
Reference in New Issue