mirror of https://github.com/Bunsly/JobSpy
parent
e5353e604d
commit
90fa4a4c4f
|
@ -0,0 +1,44 @@
|
|||
import re
|
||||
import tls_client
|
||||
|
||||
|
||||
def count_urgent_words(description: str) -> int:
|
||||
"""
|
||||
Count the number of urgent words or phrases in a job description.
|
||||
"""
|
||||
urgent_patterns = re.compile(
|
||||
r"\burgen(t|cy)|\bimmediate(ly)?\b|start asap|\bhiring (now|immediate(ly)?)\b",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
matches = re.findall(urgent_patterns, description)
|
||||
count = len(matches)
|
||||
|
||||
return count
|
||||
|
||||
|
||||
def extract_emails_from_text(text: str) -> list[str] | None:
|
||||
if not text:
|
||||
return None
|
||||
email_regex = re.compile(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}")
|
||||
return email_regex.findall(text)
|
||||
|
||||
|
||||
def create_session(proxy: str | None = None):
|
||||
"""
|
||||
Creates a tls client session
|
||||
|
||||
:return: A session object with or without proxies.
|
||||
"""
|
||||
session = tls_client.Session(
|
||||
client_identifier="chrome112",
|
||||
random_tls_extension_order=True,
|
||||
)
|
||||
session.proxies = proxy
|
||||
# TODO multiple proxies
|
||||
# if self.proxies:
|
||||
# session.proxies = {
|
||||
# "http": random.choice(self.proxies),
|
||||
# "https": random.choice(self.proxies),
|
||||
# }
|
||||
|
||||
return session
|
Loading…
Reference in New Issue