added filter by title name to be more accurate

pull/231/head
Yariv Menachem 2024-12-19 13:12:39 +02:00
parent d30d58d7f4
commit 3cabbcff95
1 changed files with 16 additions and 4 deletions

View File

@ -1,20 +1,31 @@
import asyncio
import re
from jobspy import Site, scrape_jobs
from jobspy.db.job_repository import JobRepository
from jobspy.jobs import JobPost
from jobspy.telegram_bot import TelegramBot
filter_by_title: list[str] = ["test", "qa", "Lead", "Full Stack", "Fullstack", "Frontend"
"data", "automation", "BI", "Principal"]
def filter_jobs_by_title_name(job: JobPost):
for filter_title in filter_by_title:
if re.search(filter_title, job.title, re.IGNORECASE):
return False
return True
async def main():
telegramBot = TelegramBot()
jobRepository = JobRepository()
jobs = scrape_jobs(
# site_name=[Site.LINKEDIN, Site.GOOZALI, Site.GLASSDOOR, Site.INDEED],
site_name=[Site.GOOZALI],
site_name=[Site.LINKEDIN, Site.GLASSDOOR, Site.INDEED],
# site_name=[Site.GOOZALI],
search_term="software engineer",
google_search_term="software engineer jobs near Tel Aviv Israel since yesterday",
location="Central, Israel",
# locations=["Rehovot"],
locations=["Tel Aviv, Israel", "Ramat Gan, Israel",
"Central, Israel", "Rehovot ,Israel"],
results_wanted=200,
@ -22,6 +33,7 @@ async def main():
country_indeed='israel',
)
print(f"Found {len(jobs)} jobs")
job = filter(filter_jobs_by_title_name, jobs)
newJobs = jobRepository.insertManyIfNotFound(jobs)