filter works

pull/231/head
Yariv Menachem 2024-12-19 13:25:55 +02:00
parent 02a838f8ba
commit c105c31bab
1 changed files with 21 additions and 18 deletions

View File

@ -3,15 +3,18 @@ import re
from jobspy import Site, scrape_jobs from jobspy import Site, scrape_jobs
from jobspy.db.job_repository import JobRepository from jobspy.db.job_repository import JobRepository
from jobspy.jobs import JobPost from jobspy.jobs import JobPost
from jobspy.scrapers.utils import create_logger
from jobspy.telegram_bot import TelegramBot from jobspy.telegram_bot import TelegramBot
logger = create_logger("Main")
filter_by_title: list[str] = ["test", "qa", "Lead", "Full Stack", "Fullstack", "Frontend" filter_by_title: list[str] = ["test", "qa", "Lead", "Full Stack", "Fullstack", "Frontend"
"data", "automation", "BI", "Principal"] "data", "automation", "BI", "Principal"]
def filter_jobs_by_title_name(job: JobPost): def filter_jobs_by_title_name(job):
for filter_title in filter_by_title: for filter_title in filter_by_title:
if re.search(filter_title, job.title, re.IGNORECASE): if re.search(filter_title, job.title, re.IGNORECASE):
logger.info(f"job filtered out by title: {job.id} , {job.title}")
return False return False
return True return True
@ -20,25 +23,25 @@ def filter_jobs_by_title_name(job: JobPost):
async def main(): async def main():
telegramBot = TelegramBot() telegramBot = TelegramBot()
jobRepository = JobRepository() jobRepository = JobRepository()
# sites_to_scrap = [Site.LINKEDIN, Site.GLASSDOOR, Site.INDEED, Site.GOOZALI]
sites_to_scrap = [Site.GLASSDOOR]
for site in sites_to_scrap:
jobs = scrape_jobs(
site_name=[site],
search_term="software engineer",
google_search_term="software engineer jobs near Tel Aviv Israel since yesterday",
locations=["Ramat Gan, Israel"],
results_wanted=200,
hours_old=200,
country_indeed='israel',
)
logger.info(f"Found {len(jobs)} jobs")
jobs = list(filter(filter_jobs_by_title_name, jobs))
jobs = scrape_jobs( newJobs = jobRepository.insertManyIfNotFound(jobs)
site_name=[Site.LINKEDIN, Site.GLASSDOOR, Site.INDEED],
# site_name=[Site.GOOZALI],
search_term="software engineer",
google_search_term="software engineer jobs near Tel Aviv Israel since yesterday",
locations=["Tel Aviv, Israel", "Ramat Gan, Israel",
"Central, Israel", "Rehovot ,Israel"],
results_wanted=200,
hours_old=200,
country_indeed='israel',
)
print(f"Found {len(jobs)} jobs")
job = filter(filter_jobs_by_title_name, jobs)
newJobs = jobRepository.insertManyIfNotFound(jobs) for newJob in newJobs:
await telegramBot.sendJob(newJob)
for newJob in newJobs:
await telegramBot.sendJob(newJob)
# Run the async main function # Run the async main function
if __name__ == "__main__": if __name__ == "__main__":