Compare commits

...

5 Commits

Author SHA1 Message Date
Zachary Hampton
a37e7f235e Merge pull request #42 from cullenwatson/fix/class-type-error
- refactor & #41 bug fix
2023-09-06 16:33:59 -07:00
Zachary Hampton
690739e858 - refactor & #41 bug fix 2023-09-06 16:32:51 -07:00
Cullen Watson
43eb2fe0e8 remove gitattr 2023-09-06 11:34:51 -05:00
Cullen Watson
e50227bba6 clear output jupyter 2023-09-06 11:32:32 -05:00
Cullen Watson
45c2d76e15 add yt guide 2023-09-06 11:26:55 -05:00
9 changed files with 55 additions and 1019 deletions

File diff suppressed because one or more lines are too long

View File

@@ -7,6 +7,10 @@
- Scrapes job postings from **LinkedIn**, **Indeed** & **ZipRecruiter** simultaneously - Scrapes job postings from **LinkedIn**, **Indeed** & **ZipRecruiter** simultaneously
- Aggregates the job postings in a Pandas DataFrame - Aggregates the job postings in a Pandas DataFrame
[Video Guide for JobSpy](https://www.youtube.com/watch?v=-yS3mgI5H-4)
![jobspy](https://github.com/cullenwatson/JobSpy/assets/78247585/ec7ef355-05f6-4fd3-8161-a817e31c5c57) ![jobspy](https://github.com/cullenwatson/JobSpy/assets/78247585/ec7ef355-05f6-4fd3-8161-a817e31c5c57)
### Installation ### Installation
@@ -22,31 +26,30 @@ pip install python-jobspy
from jobspy import scrape_jobs from jobspy import scrape_jobs
import pandas as pd import pandas as pd
jobs: pd.DataFrame = scrape_jobs( result: pd.DataFrame = scrape_jobs(
site_name=["indeed", "linkedin", "zip_recruiter"], site_name=["indeed", "linkedin", "zip_recruiter"],
search_term="software engineer", search_term="software engineer",
location="Dallas, TX", location="Dallas, TX",
results_wanted=10, results_wanted=10,
country='USA' # only needed for indeed country_indeed='USA' # only needed for indeed
) )
if jobs.empty:
print("No jobs found.")
else:
pd.set_option('display.max_columns', None) pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None) pd.set_option('display.max_rows', None)
pd.set_option('display.width', None) pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', 50) # set to 0 to see full job url / desc pd.set_option('display.max_colwidth', 50) # set to 0 to see full job url / desc
#1 output #1 output
print(jobs) print(result.jobs)
print(result.errors)
#2 display in Jupyter Notebook #2 display in Jupyter Notebook
#display(jobs) #display(result.jobs)
#display(result.errors)
#3 output to .csv #3 output to .csv
#jobs.to_csv('jobs.csv', index=False) #result.jobs.to_csv('result.jobs.csv', index=False)
``` ```
### Output ### Output
@@ -71,7 +74,7 @@ Optional
├── is_remote (bool) ├── is_remote (bool)
├── results_wanted (int): number of job results to retrieve for each site specified in 'site_type' ├── results_wanted (int): number of job results to retrieve for each site specified in 'site_type'
├── easy_apply (bool): filters for jobs that are hosted on LinkedIn ├── easy_apply (bool): filters for jobs that are hosted on LinkedIn
├── country (enum): filters the country on Indeed ├── country_indeed (enum): filters the country on Indeed
``` ```
@@ -108,7 +111,7 @@ ZipRecruiter searches for jobs in US/Canada & uses only the `location` parameter
### **Indeed** ### **Indeed**
For Indeed, the `country` parameter is required. Additionally, use the `location` parameter and include the city or state if necessary. For Indeed, the `country_indeed` parameter is required. Additionally, use the `location` parameter and include the city or state if necessary.
You can specify the following countries when searching on Indeed (use the exact name): You can specify the following countries when searching on Indeed (use the exact name):

View File

@@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "python-jobspy" name = "python-jobspy"
version = "1.1.1" version = "1.1.2"
description = "Job scraper for LinkedIn, Indeed & ZipRecruiter" description = "Job scraper for LinkedIn, Indeed & ZipRecruiter"
authors = ["Zachary Hampton <zachary@zacharysproducts.com>", "Cullen Watson <cullen@cullen.ai>"] authors = ["Zachary Hampton <zachary@zacharysproducts.com>", "Cullen Watson <cullen@cullen.ai>"]
readme = "README.md" readme = "README.md"

View File

@@ -26,7 +26,7 @@ def _map_str_to_site(site_name: str) -> Site:
def scrape_jobs( def scrape_jobs(
site_name: str | Site | List[Site], site_name: str | List[str] | Site | List[Site],
search_term: str, search_term: str,
location: str = "", location: str = "",
distance: int = None, distance: int = None,
@@ -43,11 +43,12 @@ def scrape_jobs(
""" """
if type(site_name) == str: if type(site_name) == str:
site_name = _map_str_to_site(site_name) site_type = [_map_str_to_site(site_name)]
else: #: if type(site_name) == list
site_type = [_map_str_to_site(site) if type(site) == str else site_name for site in site_name]
country_enum = Country.from_string(country_indeed) country_enum = Country.from_string(country_indeed)
site_type = [site_name] if type(site_name) == Site else site_name
scraper_input = ScraperInput( scraper_input = ScraperInput(
site_type=site_type, site_type=site_type,
country=country_enum, country=country_enum,
@@ -122,7 +123,6 @@ def scrape_jobs(
errors_list = [(key, value) for key, value in errors.items()] errors_list = [(key, value) for key, value in errors.items()]
errors_df = pd.DataFrame(errors_list, columns=["Site", "Error"]) errors_df = pd.DataFrame(errors_list, columns=["Site", "Error"])
if dfs: if dfs:
df = pd.concat(dfs, ignore_index=True) df = pd.concat(dfs, ignore_index=True)
if hyperlinks: if hyperlinks:

View File

@@ -78,9 +78,7 @@ class IndeedScraper(Scraper):
raise StatusException(response.status_code) raise StatusException(response.status_code)
soup = BeautifulSoup(response.content, "html.parser") soup = BeautifulSoup(response.content, "html.parser")
with open("text2.html", "w", encoding="utf-8") as f: if "did not match any jobs" in response.text:
f.write(str(soup))
if "did not match any jobs" in str(soup):
raise ParsingException("Search did not match any jobs") raise ParsingException("Search did not match any jobs")
jobs = IndeedScraper.parse_jobs( jobs = IndeedScraper.parse_jobs(

10
src/tests/test_all.py Normal file
View File

@@ -0,0 +1,10 @@
from ..jobspy import scrape_jobs
def test_all():
result = scrape_jobs(
site_name=["linkedin", "indeed", "zip_recruiter"],
search_term="software engineer",
results_wanted=5,
)
assert result is not None and result.errors.empty is True

View File

@@ -6,4 +6,4 @@ def test_indeed():
site_name="indeed", site_name="indeed",
search_term="software engineer", search_term="software engineer",
) )
assert result is not None assert result is not None and result.errors.empty is True

View File

@@ -1,4 +1,4 @@
from jobspy import scrape_jobs from ..jobspy import scrape_jobs
def test_linkedin(): def test_linkedin():
@@ -6,4 +6,4 @@ def test_linkedin():
site_name="linkedin", site_name="linkedin",
search_term="software engineer", search_term="software engineer",
) )
assert result is not None assert result is not None and result.errors.empty is True

View File

@@ -1,4 +1,4 @@
from jobspy import scrape_jobs from ..jobspy import scrape_jobs
def test_ziprecruiter(): def test_ziprecruiter():
@@ -7,4 +7,4 @@ def test_ziprecruiter():
search_term="software engineer", search_term="software engineer",
) )
assert result is not None assert result is not None and result.errors.empty is True