error log

pull/53/head
yiwen 2023-10-01 16:41:28 +08:00
parent 6d2cdcf813
commit 1834889d9d
2 changed files with 19 additions and 13 deletions

View File

@ -21,19 +21,24 @@ pd.set_option('display.max_colwidth', 50) # set to 0 to see full job url / desc
# fetch jobs for each location # fetch jobs for each location
locations = read_location_list('location.json') locations = read_location_list('location.json')
for location in locations: for location in locations:
jobs: pd.DataFrame = scrape_jobs( try:
# site_name=["indeed", "linkedin", "zip_recruiter"], jobs: pd.DataFrame = scrape_jobs(
site_name=["indeed"], # site_name=["indeed", "linkedin", "zip_recruiter"],
search_term="software engineer", site_name=["indeed"],
location=location, search_term="software engineer",
results_wanted=30, location=location,
# be wary the higher it is, the more likey you'll get blocked (rotating proxy should work tho) results_wanted=30,
country_indeed='USA', # be wary the higher it is, the more likey you'll get blocked (rotating proxy should work tho)
# offset=25 # start jobs from an offset (use if search failed and want to continue) country_indeed='USA',
proxy="http://34.120.172.140:8123", # offset=25 # start jobs from an offset (use if search failed and want to continue)
# proxy="http://crawler-gost-proxy.jobright-internal.com:8080", proxy="http://34.120.172.140:8123",
) # proxy="http://crawler-gost-proxy.jobright-internal.com:8080",
)
except Exception as e:
print(f'Error when process: {location}')
print(e)
continue
print(f'{location}: {jobs.shape[0]} rows append.')
if os.path.isfile('./jobs.csv'): if os.path.isfile('./jobs.csv'):
jobs.to_csv('./jobs.csv', index=False, mode='a', header=False) jobs.to_csv('./jobs.csv', index=False, mode='a', header=False)
else: else:

View File

@ -28,6 +28,7 @@ from ...jobs import (
) )
from .. import Scraper, ScraperInput, Site from .. import Scraper, ScraperInput, Site
def extract_emails_from_text(text: str) -> Optional[list[str]]: def extract_emails_from_text(text: str) -> Optional[list[str]]:
if not text: if not text:
return None return None