mirror of https://github.com/Bunsly/JobSpy
- refactor & #41 bug fix
parent
43eb2fe0e8
commit
690739e858
|
@ -1,6 +1,6 @@
|
||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "python-jobspy"
|
name = "python-jobspy"
|
||||||
version = "1.1.1"
|
version = "1.1.2"
|
||||||
description = "Job scraper for LinkedIn, Indeed & ZipRecruiter"
|
description = "Job scraper for LinkedIn, Indeed & ZipRecruiter"
|
||||||
authors = ["Zachary Hampton <zachary@zacharysproducts.com>", "Cullen Watson <cullen@cullen.ai>"]
|
authors = ["Zachary Hampton <zachary@zacharysproducts.com>", "Cullen Watson <cullen@cullen.ai>"]
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
|
|
|
@ -26,7 +26,7 @@ def _map_str_to_site(site_name: str) -> Site:
|
||||||
|
|
||||||
|
|
||||||
def scrape_jobs(
|
def scrape_jobs(
|
||||||
site_name: str | Site | List[Site],
|
site_name: str | List[str] | Site | List[Site],
|
||||||
search_term: str,
|
search_term: str,
|
||||||
location: str = "",
|
location: str = "",
|
||||||
distance: int = None,
|
distance: int = None,
|
||||||
|
@ -43,11 +43,12 @@ def scrape_jobs(
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if type(site_name) == str:
|
if type(site_name) == str:
|
||||||
site_name = _map_str_to_site(site_name)
|
site_type = [_map_str_to_site(site_name)]
|
||||||
|
else: #: if type(site_name) == list
|
||||||
|
site_type = [_map_str_to_site(site) if type(site) == str else site_name for site in site_name]
|
||||||
|
|
||||||
country_enum = Country.from_string(country_indeed)
|
country_enum = Country.from_string(country_indeed)
|
||||||
|
|
||||||
site_type = [site_name] if type(site_name) == Site else site_name
|
|
||||||
scraper_input = ScraperInput(
|
scraper_input = ScraperInput(
|
||||||
site_type=site_type,
|
site_type=site_type,
|
||||||
country=country_enum,
|
country=country_enum,
|
||||||
|
@ -122,7 +123,6 @@ def scrape_jobs(
|
||||||
errors_list = [(key, value) for key, value in errors.items()]
|
errors_list = [(key, value) for key, value in errors.items()]
|
||||||
errors_df = pd.DataFrame(errors_list, columns=["Site", "Error"])
|
errors_df = pd.DataFrame(errors_list, columns=["Site", "Error"])
|
||||||
|
|
||||||
|
|
||||||
if dfs:
|
if dfs:
|
||||||
df = pd.concat(dfs, ignore_index=True)
|
df = pd.concat(dfs, ignore_index=True)
|
||||||
if hyperlinks:
|
if hyperlinks:
|
||||||
|
|
|
@ -78,9 +78,7 @@ class IndeedScraper(Scraper):
|
||||||
raise StatusException(response.status_code)
|
raise StatusException(response.status_code)
|
||||||
|
|
||||||
soup = BeautifulSoup(response.content, "html.parser")
|
soup = BeautifulSoup(response.content, "html.parser")
|
||||||
with open("text2.html", "w", encoding="utf-8") as f:
|
if "did not match any jobs" in response.text:
|
||||||
f.write(str(soup))
|
|
||||||
if "did not match any jobs" in str(soup):
|
|
||||||
raise ParsingException("Search did not match any jobs")
|
raise ParsingException("Search did not match any jobs")
|
||||||
|
|
||||||
jobs = IndeedScraper.parse_jobs(
|
jobs = IndeedScraper.parse_jobs(
|
||||||
|
|
|
@ -0,0 +1,10 @@
|
||||||
|
from ..jobspy import scrape_jobs
|
||||||
|
|
||||||
|
|
||||||
|
def test_all():
|
||||||
|
result = scrape_jobs(
|
||||||
|
site_name=["linkedin", "indeed", "zip_recruiter"],
|
||||||
|
search_term="software engineer",
|
||||||
|
results_wanted=5,
|
||||||
|
)
|
||||||
|
assert result is not None and result.errors.empty is True
|
|
@ -6,4 +6,4 @@ def test_indeed():
|
||||||
site_name="indeed",
|
site_name="indeed",
|
||||||
search_term="software engineer",
|
search_term="software engineer",
|
||||||
)
|
)
|
||||||
assert result is not None
|
assert result is not None and result.errors.empty is True
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
from jobspy import scrape_jobs
|
from ..jobspy import scrape_jobs
|
||||||
|
|
||||||
|
|
||||||
def test_linkedin():
|
def test_linkedin():
|
||||||
|
@ -6,4 +6,4 @@ def test_linkedin():
|
||||||
site_name="linkedin",
|
site_name="linkedin",
|
||||||
search_term="software engineer",
|
search_term="software engineer",
|
||||||
)
|
)
|
||||||
assert result is not None
|
assert result is not None and result.errors.empty is True
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
from jobspy import scrape_jobs
|
from ..jobspy import scrape_jobs
|
||||||
|
|
||||||
|
|
||||||
def test_ziprecruiter():
|
def test_ziprecruiter():
|
||||||
|
@ -7,4 +7,4 @@ def test_ziprecruiter():
|
||||||
search_term="software engineer",
|
search_term="software engineer",
|
||||||
)
|
)
|
||||||
|
|
||||||
assert result is not None
|
assert result is not None and result.errors.empty is True
|
||||||
|
|
Loading…
Reference in New Issue