[fix] util func

pull/54/head v1.1.10
Cullen Watson 2023-09-28 18:33:14 -05:00
parent c676050dc0
commit bff39a2625
4 changed files with 27 additions and 4 deletions

View File

@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "python-jobspy" name = "python-jobspy"
version = "1.1.9" version = "1.1.10"
description = "Job scraper for LinkedIn, Indeed & ZipRecruiter" description = "Job scraper for LinkedIn, Indeed & ZipRecruiter"
authors = ["Zachary Hampton <zachary@zacharysproducts.com>", "Cullen Watson <cullen@cullen.ai>"] authors = ["Zachary Hampton <zachary@zacharysproducts.com>", "Cullen Watson <cullen@cullen.ai>"]
homepage = "https://github.com/cullenwatson/JobSpy" homepage = "https://github.com/cullenwatson/JobSpy"

View File

@ -27,7 +27,12 @@ from ...jobs import (
JobType, JobType,
) )
from .. import Scraper, ScraperInput, Site from .. import Scraper, ScraperInput, Site
from ...utils import extract_emails_from_text
def extract_emails_from_text(text: str) -> Optional[list[str]]:
if not text:
return None
email_regex = re.compile(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}")
return email_regex.findall(text)
class IndeedScraper(Scraper): class IndeedScraper(Scraper):

View File

@ -9,6 +9,7 @@ from datetime import datetime
import requests import requests
import time import time
import re
from requests.exceptions import ProxyError from requests.exceptions import ProxyError
from concurrent.futures import ThreadPoolExecutor, as_completed from concurrent.futures import ThreadPoolExecutor, as_completed
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
@ -23,7 +24,13 @@ from ...jobs import (
JobResponse, JobResponse,
JobType, JobType,
) )
from ...utils import extract_emails_from_text
def extract_emails_from_text(text: str) -> Optional[list[str]]:
if not text:
return None
email_regex = re.compile(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}")
return email_regex.findall(text)
class LinkedInScraper(Scraper): class LinkedInScraper(Scraper):
@ -256,3 +263,9 @@ class LinkedInScraper(Scraper):
) )
return location return location
def extract_emails_from_text(text: str) -> Optional[list[str]]:
if not text:
return None
email_regex = re.compile(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}")
return email_regex.findall(text)

View File

@ -28,7 +28,12 @@ from ...jobs import (
JobType, JobType,
Country, Country,
) )
from ...utils import extract_emails_from_text
def extract_emails_from_text(text: str) -> Optional[list[str]]:
if not text:
return None
email_regex = re.compile(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}")
return email_regex.findall(text)
class ZipRecruiterScraper(Scraper): class ZipRecruiterScraper(Scraper):