From 1834889d9df0a78ed9c13597378109c629a0be71 Mon Sep 17 00:00:00 2001 From: yiwen Date: Sun, 1 Oct 2023 16:41:28 +0800 Subject: [PATCH] error log --- examples/JobSpy_Demo.py | 31 +++++++++++++++----------- src/jobspy/scrapers/indeed/__init__.py | 1 + 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/examples/JobSpy_Demo.py b/examples/JobSpy_Demo.py index 1bf382a..df7ae41 100644 --- a/examples/JobSpy_Demo.py +++ b/examples/JobSpy_Demo.py @@ -21,19 +21,24 @@ pd.set_option('display.max_colwidth', 50) # set to 0 to see full job url / desc # fetch jobs for each location locations = read_location_list('location.json') for location in locations: - jobs: pd.DataFrame = scrape_jobs( - # site_name=["indeed", "linkedin", "zip_recruiter"], - site_name=["indeed"], - search_term="software engineer", - location=location, - results_wanted=30, - # be wary the higher it is, the more likey you'll get blocked (rotating proxy should work tho) - country_indeed='USA', - # offset=25 # start jobs from an offset (use if search failed and want to continue) - proxy="http://34.120.172.140:8123", - # proxy="http://crawler-gost-proxy.jobright-internal.com:8080", - ) - + try: + jobs: pd.DataFrame = scrape_jobs( + # site_name=["indeed", "linkedin", "zip_recruiter"], + site_name=["indeed"], + search_term="software engineer", + location=location, + results_wanted=30, + # be wary the higher it is, the more likey you'll get blocked (rotating proxy should work tho) + country_indeed='USA', + # offset=25 # start jobs from an offset (use if search failed and want to continue) + proxy="http://34.120.172.140:8123", + # proxy="http://crawler-gost-proxy.jobright-internal.com:8080", + ) + except Exception as e: + print(f'Error when process: {location}') + print(e) + continue + print(f'{location}: {jobs.shape[0]} rows append.') if os.path.isfile('./jobs.csv'): jobs.to_csv('./jobs.csv', index=False, mode='a', header=False) else: diff --git a/src/jobspy/scrapers/indeed/__init__.py b/src/jobspy/scrapers/indeed/__init__.py index 235147f..4c2a677 100644 --- a/src/jobspy/scrapers/indeed/__init__.py +++ b/src/jobspy/scrapers/indeed/__init__.py @@ -28,6 +28,7 @@ from ...jobs import ( ) from .. import Scraper, ScraperInput, Site + def extract_emails_from_text(text: str) -> Optional[list[str]]: if not text: return None