mirror of
https://github.com/Bunsly/JobSpy.git
synced 2026-03-05 03:54:31 -08:00
Compare commits
5 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a37e7f235e | ||
|
|
690739e858 | ||
|
|
43eb2fe0e8 | ||
|
|
e50227bba6 | ||
|
|
45c2d76e15 |
1003
JobSpy_Demo.ipynb
1003
JobSpy_Demo.ipynb
File diff suppressed because one or more lines are too long
23
README.md
23
README.md
@@ -7,6 +7,10 @@
|
||||
- Scrapes job postings from **LinkedIn**, **Indeed** & **ZipRecruiter** simultaneously
|
||||
- Aggregates the job postings in a Pandas DataFrame
|
||||
|
||||
[Video Guide for JobSpy](https://www.youtube.com/watch?v=-yS3mgI5H-4)
|
||||
|
||||
|
||||
|
||||

|
||||
|
||||
### Installation
|
||||
@@ -22,31 +26,30 @@ pip install python-jobspy
|
||||
from jobspy import scrape_jobs
|
||||
import pandas as pd
|
||||
|
||||
jobs: pd.DataFrame = scrape_jobs(
|
||||
result: pd.DataFrame = scrape_jobs(
|
||||
site_name=["indeed", "linkedin", "zip_recruiter"],
|
||||
search_term="software engineer",
|
||||
location="Dallas, TX",
|
||||
results_wanted=10,
|
||||
|
||||
country='USA' # only needed for indeed
|
||||
country_indeed='USA' # only needed for indeed
|
||||
)
|
||||
|
||||
if jobs.empty:
|
||||
print("No jobs found.")
|
||||
else:
|
||||
pd.set_option('display.max_columns', None)
|
||||
pd.set_option('display.max_rows', None)
|
||||
pd.set_option('display.width', None)
|
||||
pd.set_option('display.max_colwidth', 50) # set to 0 to see full job url / desc
|
||||
|
||||
#1 output
|
||||
print(jobs)
|
||||
print(result.jobs)
|
||||
print(result.errors)
|
||||
|
||||
#2 display in Jupyter Notebook
|
||||
#display(jobs)
|
||||
#display(result.jobs)
|
||||
#display(result.errors)
|
||||
|
||||
#3 output to .csv
|
||||
#jobs.to_csv('jobs.csv', index=False)
|
||||
#result.jobs.to_csv('result.jobs.csv', index=False)
|
||||
```
|
||||
|
||||
### Output
|
||||
@@ -71,7 +74,7 @@ Optional
|
||||
├── is_remote (bool)
|
||||
├── results_wanted (int): number of job results to retrieve for each site specified in 'site_type'
|
||||
├── easy_apply (bool): filters for jobs that are hosted on LinkedIn
|
||||
├── country (enum): filters the country on Indeed
|
||||
├── country_indeed (enum): filters the country on Indeed
|
||||
```
|
||||
|
||||
|
||||
@@ -108,7 +111,7 @@ ZipRecruiter searches for jobs in US/Canada & uses only the `location` parameter
|
||||
|
||||
|
||||
### **Indeed**
|
||||
For Indeed, the `country` parameter is required. Additionally, use the `location` parameter and include the city or state if necessary.
|
||||
For Indeed, the `country_indeed` parameter is required. Additionally, use the `location` parameter and include the city or state if necessary.
|
||||
|
||||
You can specify the following countries when searching on Indeed (use the exact name):
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[tool.poetry]
|
||||
name = "python-jobspy"
|
||||
version = "1.1.1"
|
||||
version = "1.1.2"
|
||||
description = "Job scraper for LinkedIn, Indeed & ZipRecruiter"
|
||||
authors = ["Zachary Hampton <zachary@zacharysproducts.com>", "Cullen Watson <cullen@cullen.ai>"]
|
||||
readme = "README.md"
|
||||
|
||||
@@ -26,7 +26,7 @@ def _map_str_to_site(site_name: str) -> Site:
|
||||
|
||||
|
||||
def scrape_jobs(
|
||||
site_name: str | Site | List[Site],
|
||||
site_name: str | List[str] | Site | List[Site],
|
||||
search_term: str,
|
||||
location: str = "",
|
||||
distance: int = None,
|
||||
@@ -43,11 +43,12 @@ def scrape_jobs(
|
||||
"""
|
||||
|
||||
if type(site_name) == str:
|
||||
site_name = _map_str_to_site(site_name)
|
||||
site_type = [_map_str_to_site(site_name)]
|
||||
else: #: if type(site_name) == list
|
||||
site_type = [_map_str_to_site(site) if type(site) == str else site_name for site in site_name]
|
||||
|
||||
country_enum = Country.from_string(country_indeed)
|
||||
|
||||
site_type = [site_name] if type(site_name) == Site else site_name
|
||||
scraper_input = ScraperInput(
|
||||
site_type=site_type,
|
||||
country=country_enum,
|
||||
@@ -122,7 +123,6 @@ def scrape_jobs(
|
||||
errors_list = [(key, value) for key, value in errors.items()]
|
||||
errors_df = pd.DataFrame(errors_list, columns=["Site", "Error"])
|
||||
|
||||
|
||||
if dfs:
|
||||
df = pd.concat(dfs, ignore_index=True)
|
||||
if hyperlinks:
|
||||
|
||||
@@ -78,9 +78,7 @@ class IndeedScraper(Scraper):
|
||||
raise StatusException(response.status_code)
|
||||
|
||||
soup = BeautifulSoup(response.content, "html.parser")
|
||||
with open("text2.html", "w", encoding="utf-8") as f:
|
||||
f.write(str(soup))
|
||||
if "did not match any jobs" in str(soup):
|
||||
if "did not match any jobs" in response.text:
|
||||
raise ParsingException("Search did not match any jobs")
|
||||
|
||||
jobs = IndeedScraper.parse_jobs(
|
||||
|
||||
10
src/tests/test_all.py
Normal file
10
src/tests/test_all.py
Normal file
@@ -0,0 +1,10 @@
|
||||
from ..jobspy import scrape_jobs
|
||||
|
||||
|
||||
def test_all():
|
||||
result = scrape_jobs(
|
||||
site_name=["linkedin", "indeed", "zip_recruiter"],
|
||||
search_term="software engineer",
|
||||
results_wanted=5,
|
||||
)
|
||||
assert result is not None and result.errors.empty is True
|
||||
@@ -6,4 +6,4 @@ def test_indeed():
|
||||
site_name="indeed",
|
||||
search_term="software engineer",
|
||||
)
|
||||
assert result is not None
|
||||
assert result is not None and result.errors.empty is True
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from jobspy import scrape_jobs
|
||||
from ..jobspy import scrape_jobs
|
||||
|
||||
|
||||
def test_linkedin():
|
||||
@@ -6,4 +6,4 @@ def test_linkedin():
|
||||
site_name="linkedin",
|
||||
search_term="software engineer",
|
||||
)
|
||||
assert result is not None
|
||||
assert result is not None and result.errors.empty is True
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from jobspy import scrape_jobs
|
||||
from ..jobspy import scrape_jobs
|
||||
|
||||
|
||||
def test_ziprecruiter():
|
||||
@@ -7,4 +7,4 @@ def test_ziprecruiter():
|
||||
search_term="software engineer",
|
||||
)
|
||||
|
||||
assert result is not None
|
||||
assert result is not None and result.errors.empty is True
|
||||
|
||||
Reference in New Issue
Block a user