Merge pull request #42 from cullenwatson/fix/class-type-error

- refactor & #41 bug fix
2026-03-05 03:54:31 -08:00 · 2023-09-06 16:33:59 -07:00 · 2023-09-06 16:32:51 -07:00 · 2023-09-06 11:34:51 -05:00 · 2023-09-06 11:32:32 -05:00 · 2023-09-06 11:26:55 -05:00
9 changed files with 55 additions and 1019 deletions
--- a/JobSpy_Demo.ipynb
+++ b/JobSpy_Demo.ipynb
--- a/README.md
+++ b/README.md
@@ -7,6 +7,10 @@
 - Scrapes job postings from **LinkedIn**, **Indeed** & **ZipRecruiter** simultaneously
 - Aggregates the job postings in a Pandas DataFrame
  
+[Video Guide for JobSpy](https://www.youtube.com/watch?v=-yS3mgI5H-4)
+
+
+
 ![jobspy](https://github.com/cullenwatson/JobSpy/assets/78247585/ec7ef355-05f6-4fd3-8161-a817e31c5c57)
  
 ### Installation
@@ -22,31 +26,30 @@ pip install python-jobspy
 from jobspy import scrape_jobs
 import pandas as pd

-jobs: pd.DataFrame = scrape_jobs(
+result: pd.DataFrame = scrape_jobs(
    site_name=["indeed", "linkedin", "zip_recruiter"],
    search_term="software engineer",
    location="Dallas, TX",
    results_wanted=10,
    
-    country='USA' # only needed for indeed
+    country_indeed='USA' # only needed for indeed
 )

-if jobs.empty:
-    print("No jobs found.")
-else:
 pd.set_option('display.max_columns', None)
 pd.set_option('display.max_rows', None)
 pd.set_option('display.width', None)
 pd.set_option('display.max_colwidth', 50)  # set to 0 to see full job url / desc

 #1 output
-    print(jobs)
+print(result.jobs)
+print(result.errors)

 #2 display in Jupyter Notebook
-    #display(jobs)
+#display(result.jobs)
+#display(result.errors)

 #3 output to .csv
-    #jobs.to_csv('jobs.csv', index=False)
+#result.jobs.to_csv('result.jobs.csv', index=False)
 ```

 ### Output
@@ -71,7 +74,7 @@ Optional
 ├── is_remote (bool)
 ├── results_wanted (int): number of job results to retrieve for each site specified in 'site_type'
 ├── easy_apply (bool): filters for jobs that are hosted on LinkedIn
-├── country (enum): filters the country on Indeed
+├── country_indeed (enum): filters the country on Indeed
 ```

 
@@ -108,7 +111,7 @@ ZipRecruiter searches for jobs in US/Canada & uses only the `location` parameter


 ### **Indeed**
-For Indeed, the `country` parameter is required. Additionally, use the `location` parameter and include the city or state if necessary.
+For Indeed, the `country_indeed` parameter is required. Additionally, use the `location` parameter and include the city or state if necessary.

 You can specify the following countries when searching on Indeed (use the exact name): 

--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "python-jobspy"
-version = "1.1.1"
+version = "1.1.2"
 description = "Job scraper for LinkedIn, Indeed & ZipRecruiter"
 authors = ["Zachary Hampton <zachary@zacharysproducts.com>", "Cullen Watson <cullen@cullen.ai>"]
 readme = "README.md"
--- a/src/jobspy/init.py
+++ b/src/jobspy/init.py
@@ -26,7 +26,7 @@ def _map_str_to_site(site_name: str) -> Site:


 def scrape_jobs(
-    site_name: str | Site | List[Site],
+    site_name: str | List[str] | Site | List[Site],
    search_term: str,
    location: str = "",
    distance: int = None,
@@ -43,11 +43,12 @@ def scrape_jobs(
    """

    if type(site_name) == str:
-        site_name = _map_str_to_site(site_name)
+        site_type = [_map_str_to_site(site_name)]
+    else:  #: if type(site_name) == list
+        site_type = [_map_str_to_site(site) if type(site) == str else site_name for site in site_name]

    country_enum = Country.from_string(country_indeed)

-    site_type = [site_name] if type(site_name) == Site else site_name
    scraper_input = ScraperInput(
        site_type=site_type,
        country=country_enum,
@@ -122,7 +123,6 @@ def scrape_jobs(
    errors_list = [(key, value) for key, value in errors.items()]
    errors_df = pd.DataFrame(errors_list, columns=["Site", "Error"])

-
    if dfs:
        df = pd.concat(dfs, ignore_index=True)
        if hyperlinks:
--- a/src/jobspy/scrapers/indeed/init.py
+++ b/src/jobspy/scrapers/indeed/init.py
@@ -78,9 +78,7 @@ class IndeedScraper(Scraper):
            raise StatusException(response.status_code)

        soup = BeautifulSoup(response.content, "html.parser")
-        with open("text2.html", "w", encoding="utf-8") as f:
-            f.write(str(soup))
-        if "did not match any jobs" in str(soup):
+        if "did not match any jobs" in response.text:
            raise ParsingException("Search did not match any jobs")

        jobs = IndeedScraper.parse_jobs(
--- a/src/tests/test_all.py
+++ b/src/tests/test_all.py
@@ -0,0 +1,10 @@
+from ..jobspy import scrape_jobs
+
+
+def test_all():
+    result = scrape_jobs(
+        site_name=["linkedin", "indeed", "zip_recruiter"],
+        search_term="software engineer",
+        results_wanted=5,
+    )
+    assert result is not None and result.errors.empty is True
--- a/src/tests/test_indeed.py
+++ b/src/tests/test_indeed.py
@@ -6,4 +6,4 @@ def test_indeed():
        site_name="indeed",
        search_term="software engineer",
    )
-    assert result is not None
+    assert result is not None and result.errors.empty is True
--- a/src/tests/test_linkedin.py
+++ b/src/tests/test_linkedin.py
@@ -1,4 +1,4 @@
-from jobspy import scrape_jobs
+from ..jobspy import scrape_jobs


 def test_linkedin():
@@ -6,4 +6,4 @@ def test_linkedin():
        site_name="linkedin",
        search_term="software engineer",
    )
-    assert result is not None
+    assert result is not None and result.errors.empty is True
--- a/src/tests/test_ziprecruiter.py
+++ b/src/tests/test_ziprecruiter.py
@@ -1,4 +1,4 @@
-from jobspy import scrape_jobs
+from ..jobspy import scrape_jobs


 def test_ziprecruiter():
@@ -7,4 +7,4 @@ def test_ziprecruiter():
        search_term="software engineer",
    )

-    assert result is not None
+    assert result is not None and result.errors.empty is True
Author	SHA1	Message	Date
Zachary Hampton	a37e7f235e	Merge pull request #42 from cullenwatson/fix/class-type-error - refactor & #41 bug fix	2023-09-06 16:33:59 -07:00
Zachary Hampton	690739e858	- refactor & #41 bug fix	2023-09-06 16:32:51 -07:00
Cullen Watson	43eb2fe0e8	remove gitattr	2023-09-06 11:34:51 -05:00
Cullen Watson	e50227bba6	clear output jupyter	2023-09-06 11:32:32 -05:00
Cullen Watson	45c2d76e15	add yt guide	2023-09-06 11:26:55 -05:00