diff --git a/.gitignore b/.gitignore index b845ce8..e038225 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ /venv/ /ven/ **/__pycache__/ +**/.pytest_cache/ *.pyc .env dist diff --git a/JobSpy_Demo.ipynb b/JobSpy_Demo.ipynb index 348259c..98b74ad 100644 --- a/JobSpy_Demo.ipynb +++ b/JobSpy_Demo.ipynb @@ -1,12 +1,34 @@ { "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "00a94b47-f47b-420f-ba7e-714ef219c006", + "metadata": {}, + "outputs": [], + "source": [ + "from jobspy import scrape_jobs\n", + "import pandas as pd" + ] + }, { "cell_type": "code", "execution_count": 2, - "id": "c3f21577-477d-451e-9914-5d67e8a89075", - "metadata": { - "scrolled": true - }, + "id": "9f773e6c-d9fc-42cc-b0ef-63b739e78435", + "metadata": {}, + "outputs": [], + "source": [ + "pd.set_option('display.max_columns', None)\n", + "pd.set_option('display.max_rows', None)\n", + "pd.set_option('display.width', None)\n", + "pd.set_option('display.max_colwidth', 50)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "1253c1f8-9437-492e-9dd3-e7fe51099420", + "metadata": {}, "outputs": [ { "data": { @@ -46,20 +68,104 @@ " \n", " 0\n", " indeed\n", - " Firmware Engineer\n", - " Advanced Motion Controls\n", - " Camarillo\n", - " CA\n", + " Mental Health Therapist\n", + " Sandstone Care\n", + " Broomfield\n", + " CO\n", " fulltime\n", " yearly\n", - " 145000\n", - " 110000\n", - " https://www.indeed.com/viewjob?jk=a2e7077fdd3c...\n", - " We are looking for an experienced Firmware Eng...\n", + " 68000\n", + " 57500\n", + " https://www.indeed.com/viewjob?jk=f5f33d72e030...\n", + " Mental Health Therapist- Broomfield, CO Locati...\n", " \n", " \n", " 1\n", " indeed\n", + " .NET Developer\n", + " Noir Consulting\n", + " Irving\n", + " TX\n", + " None\n", + " yearly\n", + " 200000\n", + " 200000\n", + " https://www.indeed.com/viewjob?jk=1b22ba65296c...\n", + " .NET Software Engineer, C#, WPF - Irving (Tech...\n", + " \n", + " \n", + " 2\n", + " indeed\n", + " Senior Software Engineer\n", + " Johns Hopkins Applied Physics Laboratory (APL)\n", + " Laurel\n", + " MD\n", + " None\n", + " None\n", + " None\n", + " None\n", + " https://www.indeed.com/viewjob?jk=309eed270a88...\n", + " Description Are you a communications systems d...\n", + " \n", + " \n", + " 3\n", + " indeed\n", + " Front End Developer\n", + " Verkada\n", + " San Mateo\n", + " CA\n", + " fulltime\n", + " yearly\n", + " 285000\n", + " 120000\n", + " https://www.indeed.com/viewjob?jk=a3ea45daca75...\n", + " Who We Are Verkada is the largest cloud-based ...\n", + " \n", + " \n", + " 4\n", + " indeed\n", + " Software Engineer\n", + " Adobe\n", + " San Jose\n", + " CA\n", + " fulltime\n", + " yearly\n", + " 142700\n", + " 73200\n", + " https://www.indeed.com/viewjob?jk=0f2dc9901fc7...\n", + " Our Company Changing the world through digital...\n", + " \n", + " \n", + " 5\n", + " indeed\n", + " Full Stack Developer\n", + " Comcast\n", + " Philadelphia\n", + " PA\n", + " fulltime\n", + " yearly\n", + " 184663\n", + " 78789\n", + " https://www.indeed.com/viewjob?jk=eb5c927221eb...\n", + " Make your mark at Comcast - a Fortune 30 globa...\n", + " \n", + " \n", + " 6\n", + " indeed\n", + " Senior Software Engineer\n", + " Smart City Solutions\n", + " \n", + " FL\n", + " fulltime\n", + " yearly\n", + " 100000\n", + " 85000\n", + " https://www.indeed.com/viewjob?jk=ba1945f143a1...\n", + " Smart City hiring a full stack software develo...\n", + " \n", + " \n", + " 7\n", + " indeed\n", " Computer Engineer\n", " Honeywell\n", " \n", @@ -72,116 +178,32 @@ " Join a team recognized for leadership, innovat...\n", " \n", " \n", - " 2\n", - " indeed\n", - " Software Engineer\n", - " Splunk\n", - " Remote\n", - " None\n", - " fulltime\n", - " yearly\n", - " 159500\n", - " 116000\n", - " https://www.indeed.com/viewjob?jk=155495ca3f46...\n", - " A little about us. Splunk is the key to enterp...\n", - " \n", - " \n", - " 3\n", - " indeed\n", - " Development Operations Engineer\n", - " Stratacache\n", - " Dayton\n", - " OH\n", - " fulltime\n", - " yearly\n", - " 90000\n", - " 83573\n", - " https://www.indeed.com/viewjob?jk=77cf3540c06e...\n", - " Stratacache, Inc. delivers in-store retail exp...\n", - " \n", - " \n", - " 4\n", - " indeed\n", - " Computer Engineer\n", - " Honeywell\n", - " \n", - " None\n", - " fulltime\n", - " None\n", - " None\n", - " None\n", - " https://www.indeed.com/viewjob?jk=7fadbb7c936f...\n", - " Join a team recognized for leadership, innovat...\n", - " \n", - " \n", - " 5\n", - " indeed\n", - " Full Stack Developer\n", - " Reinventing Geospatial, Inc. (RGi)\n", - " Herndon\n", - " VA\n", - " fulltime\n", - " None\n", - " None\n", - " None\n", - " https://www.indeed.com/viewjob?jk=11b2b5b0dd44...\n", - " Job Highlights As a Full Stack Software Engine...\n", - " \n", - " \n", - " 6\n", - " indeed\n", - " Software Engineer\n", - " Workiva\n", - " Remote\n", - " None\n", - " None\n", - " yearly\n", - " 134000\n", - " 79000\n", - " https://www.indeed.com/viewjob?jk=ec3ab6eb9253...\n", - " Are you ready to embark on an exciting journey...\n", - " \n", - " \n", - " 7\n", - " indeed\n", - " Senior Software Engineer\n", - " SciTec\n", - " Boulder\n", - " CO\n", - " fulltime\n", - " yearly\n", - " 164000\n", - " 93000\n", - " https://www.indeed.com/viewjob?jk=781e4cf0cf6d...\n", - " SciTec has been awarded multiple government co...\n", - " \n", - " \n", " 8\n", " indeed\n", " Software Engineer\n", - " Microsoft\n", - " \n", + " Fidelity Investments\n", + " Westlake\n", + " TX\n", " None\n", - " fulltime\n", - " yearly\n", - " 182600\n", - " 94300\n", - " https://www.indeed.com/viewjob?jk=21e05b9e9d96...\n", - " At Microsoft we are seeking people who have a ...\n", + " None\n", + " None\n", + " None\n", + " https://www.indeed.com/viewjob?jk=b600392166bb...\n", + " Job Description: Software Engineer in Test The...\n", " \n", " \n", " 9\n", " indeed\n", - " Software Engineer\n", - " Avalon Healthcare Solutions\n", - " Remote\n", - " None\n", - " None\n", - " None\n", - " None\n", - " None\n", - " https://www.indeed.com/viewjob?jk=da35b9bb74a0...\n", - " Avalon Healthcare Solutions, headquartered in ...\n", + " Fpga Engineer\n", + " R-DEX Systems, Inc.\n", + " Atlanta\n", + " GA\n", + " fulltime\n", + " yearly\n", + " 160000\n", + " 120000\n", + " https://www.indeed.com/viewjob?jk=a7e9d356c333...\n", + " Title: Senior DSP/FPGA Firmware Engineer Descr...\n", " \n", " \n", " 10\n", @@ -236,7 +258,7 @@ " yearly\n", " None\n", " None\n", - " https://www.linkedin.com/jobs/view/3701775201\n", + " https://www.linkedin.com/jobs/view/3701770659\n", " Description:By bringing together people that u...\n", " \n", " \n", @@ -250,7 +272,7 @@ " yearly\n", " None\n", " None\n", - " https://www.linkedin.com/jobs/view/3701772329\n", + " https://www.linkedin.com/jobs/view/3701769637\n", " Description:By bringing together people that u...\n", " \n", " \n", @@ -264,12 +286,26 @@ " yearly\n", " None\n", " None\n", - " https://www.linkedin.com/jobs/view/3701769637\n", + " https://www.linkedin.com/jobs/view/3701772329\n", " Description:By bringing together people that u...\n", " \n", " \n", " 16\n", " linkedin\n", + " Software Engineer - Early Career\n", + " Lockheed Martin\n", + " Fort Worth\n", + " TX\n", + " fulltime\n", + " yearly\n", + " None\n", + " None\n", + " https://www.linkedin.com/jobs/view/3701775201\n", + " Description:By bringing together people that u...\n", + " \n", + " \n", + " 17\n", + " linkedin\n", " Software Engineer\n", " SpiderOak\n", " Austin\n", @@ -282,20 +318,6 @@ " We're only as strong as our weakest link.In th...\n", " \n", " \n", - " 17\n", - " linkedin\n", - " Software Engineer - Early Career\n", - " Lockheed Martin\n", - " Fort Worth\n", - " TX\n", - " fulltime\n", - " yearly\n", - " None\n", - " None\n", - " https://www.linkedin.com/jobs/view/3701770659\n", - " Description:By bringing together people that u...\n", - " \n", - " \n", " 18\n", " linkedin\n", " Full-Stack Software Engineer\n", @@ -326,20 +348,6 @@ " \n", " 20\n", " zip_recruiter\n", - " (USA) Software Engineer III - Prototype Engine...\n", - " Walmart\n", - " Dallas\n", - " TX\n", - " None\n", - " None\n", - " None\n", - " None\n", - " https://click.appcast.io/track/hcgsw4k?cs=ngp&...\n", - " We are currently seeking a highly skilled and ...\n", - " \n", - " \n", - " 21\n", - " zip_recruiter\n", " Software Engineer - New Grad\n", " ZipRecruiter\n", " Santa Monica\n", @@ -348,53 +356,11 @@ " yearly\n", " 130000\n", " 150000\n", - " https://www.ziprecruiter.com/jobs/ziprecruiter...\n", - " We offer a hybrid work environment. Most US-ba...\n", + " https://www.ziprecruiter.com/c/ZipRecruiter/Jo...\n", + " Demonstrated foundation in software engineerin...\n", " \n", " \n", - " 22\n", - " zip_recruiter\n", - " Software Developer\n", - " Robert Half\n", - " Corpus Christi\n", - " TX\n", - " fulltime\n", - " yearly\n", - " 105000\n", - " 115000\n", - " https://www.ziprecruiter.com/jobs/robert-half-...\n", - " Robert Half has an opening for a Software Deve...\n", - " \n", - " \n", - " 23\n", - " zip_recruiter\n", - " Software Engineer\n", - " Advantage Technical\n", - " Ontario\n", - " CA\n", - " fulltime\n", - " yearly\n", - " 100000\n", - " 150000\n", - " https://www.ziprecruiter.com/jobs/advantage-te...\n", - " New career opportunity available with major Ma...\n", - " \n", - " \n", - " 24\n", - " zip_recruiter\n", - " Software Developer\n", - " Robert Half\n", - " Tucson\n", - " AZ\n", - " temporary\n", - " hourly\n", - " 47\n", - " 55\n", - " https://www.ziprecruiter.com/jobs/robert-half-...\n", - " Robert Half is accepting inquiries for a SQL S...\n", - " \n", - " \n", - " 25\n", + " 21\n", " zip_recruiter\n", " Full Stack Software Engineer\n", " ZipRecruiter\n", @@ -404,25 +370,11 @@ " yearly\n", " 105000\n", " 145000\n", - " https://www.ziprecruiter.com/jobs/ziprecruiter...\n", - " We offer a hybrid work environment. Most US-ba...\n", + " https://www.ziprecruiter.com/c/ZipRecruiter/Jo...\n", + " Experience in client side development using Re...\n", " \n", " \n", - " 26\n", - " zip_recruiter\n", - " Software Developer IV\n", - " Kforce Inc.\n", - " Mountain View\n", - " CA\n", - " contract\n", - " hourly\n", - " 55\n", - " 75\n", - " https://www.kforce.com/Jobs/job.aspx?job=1696~...\n", - " Kforce has a client that is seeking a Software...\n", - " \n", - " \n", - " 27\n", + " 22\n", " zip_recruiter\n", " Software Developer | Onsite | Omaha, NE - Omaha\n", " OneStaff Medical\n", @@ -432,36 +384,106 @@ " yearly\n", " 60000\n", " 110000\n", - " https://www.ziprecruiter.com/jobs/onestaff-med...\n", - " Company Description: We are looking for a well...\n", + " https://www.ziprecruiter.com/c/OneStaff-Medica...\n", + " We are looking for a well-rounded Software Dev...\n", + " \n", + " \n", + " 23\n", + " zip_recruiter\n", + " Senior Software Engineer, Onsite [Real-time]\n", + " Raytheon\n", + " McKinney\n", + " TX\n", + " fulltime\n", + " yearly\n", + " 116000\n", + " 153000\n", + " https://jsv3.recruitics.com/redirect?rx_cid=34...\n", + " By joining the Silent Knight team as a Senior ...\n", + " \n", + " \n", + " 24\n", + " zip_recruiter\n", + " Senior Software Engineer - TS/SCI **Minimum $2...\n", + " Raytheon\n", + " Dallas\n", + " TX\n", + " fulltime\n", + " yearly\n", + " 122000\n", + " 162000\n", + " https://jsv3.recruitics.com/redirect?rx_cid=34...\n", + " Object Oriented Programming using C++ with Lin...\n", + " \n", + " \n", + " 25\n", + " zip_recruiter\n", + " Software Engineer III (full stack, AI/ML, Djan...\n", + " Ayahealthcare\n", + " Remote\n", + " OR\n", + " None\n", + " yearly\n", + " 156000\n", + " 165000\n", + " https://click.appcast.io/track/hcbh0qq?cs=ngp&...\n", + " The Software Engineer III will be an integral ...\n", + " \n", + " \n", + " 26\n", + " zip_recruiter\n", + " Software Engineer Full Stack\n", + " Generac Power Systems\n", + " Denver\n", + " CO\n", + " fulltime\n", + " yearly\n", + " 90000\n", + " 115000\n", + " https://www.ziprecruiter.com/c/Generac-Power-S...\n", + " As a Software Engineer on the Energy Technolog...\n", + " \n", + " \n", + " 27\n", + " zip_recruiter\n", + " Embedded Software Engineer (Fort Worth, TX or ...\n", + " Kubota\n", + " Fort Worth\n", + " TX\n", + " fulltime\n", + " yearly\n", + " 122000\n", + " 167000\n", + " https://us62e2.dayforcehcm.com/CandidatePortal...\n", + " Work with a cross-functional team to design, t...\n", " \n", " \n", " 28\n", " zip_recruiter\n", - " Senior Software Engineer\n", - " RightStaff, Inc.\n", - " Dallas\n", + " Senior Software Engineer (FT)\n", + " National Indoor RV Center\n", + " Lewisville\n", " TX\n", " fulltime\n", " yearly\n", - " 120000\n", - " 180000\n", - " https://www.ziprecruiter.com/jobs/rightstaff-i...\n", - " Job Description:We are seeking a talented and ...\n", + " 125000\n", + " 0\n", + " https://www.ziprecruiter.com/c/National-Indoor...\n", + " As a Senior Software Engineer, you will: * Des...\n", " \n", " \n", " 29\n", " zip_recruiter\n", - " Software Developer - .Net Core - 12886\n", - " Walker Elliott\n", + " 2024 Next Gen IT Program | Software Engineerin...\n", + " Southern Glazer's Wine & Spirits\n", " Dallas\n", " TX\n", - " fulltime\n", + " None\n", " yearly\n", - " 105000\n", - " 130000\n", - " https://www.ziprecruiter.com/jobs/walker-ellio...\n", - " Our highly successful DFW based client has bee...\n", + " 70000\n", + " 0\n", + " https://click.appcast.io/track/hdsbnae?cs=b4&j...\n", + " Finally, through the work assigned, the analys...\n", " \n", " \n", "\n", @@ -469,219 +491,185 @@ ], "text/plain": [ " site title \\\n", - "0 indeed Firmware Engineer \n", - "1 indeed Computer Engineer \n", - "2 indeed Software Engineer \n", - "3 indeed Development Operations Engineer \n", - "4 indeed Computer Engineer \n", + "0 indeed Mental Health Therapist \n", + "1 indeed .NET Developer \n", + "2 indeed Senior Software Engineer \n", + "3 indeed Front End Developer \n", + "4 indeed Software Engineer \n", "5 indeed Full Stack Developer \n", - "6 indeed Software Engineer \n", - "7 indeed Senior Software Engineer \n", + "6 indeed Senior Software Engineer \n", + "7 indeed Computer Engineer \n", "8 indeed Software Engineer \n", - "9 indeed Software Engineer \n", + "9 indeed Fpga Engineer \n", "10 linkedin Software Engineer \n", "11 linkedin Software Engineer - Early Career \n", "12 linkedin Software Engineer - Early Career \n", "13 linkedin Software Engineer - Early Career \n", "14 linkedin Software Engineer - Early Career \n", "15 linkedin Software Engineer - Early Career \n", - "16 linkedin Software Engineer \n", - "17 linkedin Software Engineer - Early Career \n", + "16 linkedin Software Engineer - Early Career \n", + "17 linkedin Software Engineer \n", "18 linkedin Full-Stack Software Engineer \n", "19 linkedin Software Engineer \n", - "20 zip_recruiter (USA) Software Engineer III - Prototype Engine... \n", - "21 zip_recruiter Software Engineer - New Grad \n", - "22 zip_recruiter Software Developer \n", - "23 zip_recruiter Software Engineer \n", - "24 zip_recruiter Software Developer \n", - "25 zip_recruiter Full Stack Software Engineer \n", - "26 zip_recruiter Software Developer IV \n", - "27 zip_recruiter Software Developer | Onsite | Omaha, NE - Omaha \n", - "28 zip_recruiter Senior Software Engineer \n", - "29 zip_recruiter Software Developer - .Net Core - 12886 \n", + "20 zip_recruiter Software Engineer - New Grad \n", + "21 zip_recruiter Full Stack Software Engineer \n", + "22 zip_recruiter Software Developer | Onsite | Omaha, NE - Omaha \n", + "23 zip_recruiter Senior Software Engineer, Onsite [Real-time] \n", + "24 zip_recruiter Senior Software Engineer - TS/SCI **Minimum $2... \n", + "25 zip_recruiter Software Engineer III (full stack, AI/ML, Djan... \n", + "26 zip_recruiter Software Engineer Full Stack \n", + "27 zip_recruiter Embedded Software Engineer (Fort Worth, TX or ... \n", + "28 zip_recruiter Senior Software Engineer (FT) \n", + "29 zip_recruiter 2024 Next Gen IT Program | Software Engineerin... \n", "\n", - " company_name city state job_type \\\n", - "0 Advanced Motion Controls Camarillo CA fulltime \n", - "1 Honeywell None fulltime \n", - "2 Splunk Remote None fulltime \n", - "3 Stratacache Dayton OH fulltime \n", - "4 Honeywell None fulltime \n", - "5 Reinventing Geospatial, Inc. (RGi) Herndon VA fulltime \n", - "6 Workiva Remote None None \n", - "7 SciTec Boulder CO fulltime \n", - "8 Microsoft None fulltime \n", - "9 Avalon Healthcare Solutions Remote None None \n", - "10 Fieldguide San Francisco CA fulltime \n", - "11 Lockheed Martin Sunnyvale CA fulltime \n", - "12 Lockheed Martin Edwards CA fulltime \n", - "13 Lockheed Martin Fort Worth TX fulltime \n", - "14 Lockheed Martin Fort Worth TX fulltime \n", - "15 Lockheed Martin Fort Worth TX fulltime \n", - "16 SpiderOak Austin TX fulltime \n", - "17 Lockheed Martin Fort Worth TX fulltime \n", - "18 Rain New York NY fulltime \n", - "19 Nike Portland OR contract \n", - "20 Walmart Dallas TX None \n", - "21 ZipRecruiter Santa Monica CA fulltime \n", - "22 Robert Half Corpus Christi TX fulltime \n", - "23 Advantage Technical Ontario CA fulltime \n", - "24 Robert Half Tucson AZ temporary \n", - "25 ZipRecruiter Phoenix AZ fulltime \n", - "26 Kforce Inc. Mountain View CA contract \n", - "27 OneStaff Medical Omaha NE fulltime \n", - "28 RightStaff, Inc. Dallas TX fulltime \n", - "29 Walker Elliott Dallas TX fulltime \n", + " company_name city state \\\n", + "0 Sandstone Care Broomfield CO \n", + "1 Noir Consulting Irving TX \n", + "2 Johns Hopkins Applied Physics Laboratory (APL) Laurel MD \n", + "3 Verkada San Mateo CA \n", + "4 Adobe San Jose CA \n", + "5 Comcast Philadelphia PA \n", + "6 Smart City Solutions FL \n", + "7 Honeywell None \n", + "8 Fidelity Investments Westlake TX \n", + "9 R-DEX Systems, Inc. Atlanta GA \n", + "10 Fieldguide San Francisco CA \n", + "11 Lockheed Martin Sunnyvale CA \n", + "12 Lockheed Martin Edwards CA \n", + "13 Lockheed Martin Fort Worth TX \n", + "14 Lockheed Martin Fort Worth TX \n", + "15 Lockheed Martin Fort Worth TX \n", + "16 Lockheed Martin Fort Worth TX \n", + "17 SpiderOak Austin TX \n", + "18 Rain New York NY \n", + "19 Nike Portland OR \n", + "20 ZipRecruiter Santa Monica CA \n", + "21 ZipRecruiter Phoenix AZ \n", + "22 OneStaff Medical Omaha NE \n", + "23 Raytheon McKinney TX \n", + "24 Raytheon Dallas TX \n", + "25 Ayahealthcare Remote OR \n", + "26 Generac Power Systems Denver CO \n", + "27 Kubota Fort Worth TX \n", + "28 National Indoor RV Center Lewisville TX \n", + "29 Southern Glazer's Wine & Spirits Dallas TX \n", "\n", - " interval min_amount max_amount \\\n", - "0 yearly 145000 110000 \n", - "1 None None None \n", - "2 yearly 159500 116000 \n", - "3 yearly 90000 83573 \n", - "4 None None None \n", - "5 None None None \n", - "6 yearly 134000 79000 \n", - "7 yearly 164000 93000 \n", - "8 yearly 182600 94300 \n", - "9 None None None \n", - "10 yearly None None \n", - "11 yearly None None \n", - "12 yearly None None \n", - "13 yearly None None \n", - "14 yearly None None \n", - "15 yearly None None \n", - "16 yearly None None \n", - "17 yearly None None \n", - "18 yearly None None \n", - "19 yearly None None \n", - "20 None None None \n", - "21 yearly 130000 150000 \n", - "22 yearly 105000 115000 \n", - "23 yearly 100000 150000 \n", - "24 hourly 47 55 \n", - "25 yearly 105000 145000 \n", - "26 hourly 55 75 \n", - "27 yearly 60000 110000 \n", - "28 yearly 120000 180000 \n", - "29 yearly 105000 130000 \n", + " job_type interval min_amount max_amount \\\n", + "0 fulltime yearly 68000 57500 \n", + "1 None yearly 200000 200000 \n", + "2 None None None None \n", + "3 fulltime yearly 285000 120000 \n", + "4 fulltime yearly 142700 73200 \n", + "5 fulltime yearly 184663 78789 \n", + "6 fulltime yearly 100000 85000 \n", + "7 fulltime None None None \n", + "8 None None None None \n", + "9 fulltime yearly 160000 120000 \n", + "10 fulltime yearly None None \n", + "11 fulltime yearly None None \n", + "12 fulltime yearly None None \n", + "13 fulltime yearly None None \n", + "14 fulltime yearly None None \n", + "15 fulltime yearly None None \n", + "16 fulltime yearly None None \n", + "17 fulltime yearly None None \n", + "18 fulltime yearly None None \n", + "19 contract yearly None None \n", + "20 fulltime yearly 130000 150000 \n", + "21 fulltime yearly 105000 145000 \n", + "22 fulltime yearly 60000 110000 \n", + "23 fulltime yearly 116000 153000 \n", + "24 fulltime yearly 122000 162000 \n", + "25 None yearly 156000 165000 \n", + "26 fulltime yearly 90000 115000 \n", + "27 fulltime yearly 122000 167000 \n", + "28 fulltime yearly 125000 0 \n", + "29 None yearly 70000 0 \n", "\n", " job_url \\\n", - "0 https://www.indeed.com/viewjob?jk=a2e7077fdd3c... \n", - "1 https://www.indeed.com/viewjob?jk=5a1da623ee75... \n", - "2 https://www.indeed.com/viewjob?jk=155495ca3f46... \n", - "3 https://www.indeed.com/viewjob?jk=77cf3540c06e... \n", - "4 https://www.indeed.com/viewjob?jk=7fadbb7c936f... \n", - "5 https://www.indeed.com/viewjob?jk=11b2b5b0dd44... \n", - "6 https://www.indeed.com/viewjob?jk=ec3ab6eb9253... \n", - "7 https://www.indeed.com/viewjob?jk=781e4cf0cf6d... \n", - "8 https://www.indeed.com/viewjob?jk=21e05b9e9d96... \n", - "9 https://www.indeed.com/viewjob?jk=da35b9bb74a0... \n", + "0 https://www.indeed.com/viewjob?jk=f5f33d72e030... \n", + "1 https://www.indeed.com/viewjob?jk=1b22ba65296c... \n", + "2 https://www.indeed.com/viewjob?jk=309eed270a88... \n", + "3 https://www.indeed.com/viewjob?jk=a3ea45daca75... \n", + "4 https://www.indeed.com/viewjob?jk=0f2dc9901fc7... \n", + "5 https://www.indeed.com/viewjob?jk=eb5c927221eb... \n", + "6 https://www.indeed.com/viewjob?jk=ba1945f143a1... \n", + "7 https://www.indeed.com/viewjob?jk=5a1da623ee75... \n", + "8 https://www.indeed.com/viewjob?jk=b600392166bb... \n", + "9 https://www.indeed.com/viewjob?jk=a7e9d356c333... \n", "10 https://www.linkedin.com/jobs/view/3696158160 \n", "11 https://www.linkedin.com/jobs/view/3693012711 \n", "12 https://www.linkedin.com/jobs/view/3700669785 \n", - "13 https://www.linkedin.com/jobs/view/3701775201 \n", - "14 https://www.linkedin.com/jobs/view/3701772329 \n", - "15 https://www.linkedin.com/jobs/view/3701769637 \n", - "16 https://www.linkedin.com/jobs/view/3707174719 \n", - "17 https://www.linkedin.com/jobs/view/3701770659 \n", + "13 https://www.linkedin.com/jobs/view/3701770659 \n", + "14 https://www.linkedin.com/jobs/view/3701769637 \n", + "15 https://www.linkedin.com/jobs/view/3701772329 \n", + "16 https://www.linkedin.com/jobs/view/3701775201 \n", + "17 https://www.linkedin.com/jobs/view/3707174719 \n", "18 https://www.linkedin.com/jobs/view/3696158877 \n", "19 https://www.linkedin.com/jobs/view/3693340247 \n", - "20 https://click.appcast.io/track/hcgsw4k?cs=ngp&... \n", - "21 https://www.ziprecruiter.com/jobs/ziprecruiter... \n", - "22 https://www.ziprecruiter.com/jobs/robert-half-... \n", - "23 https://www.ziprecruiter.com/jobs/advantage-te... \n", - "24 https://www.ziprecruiter.com/jobs/robert-half-... \n", - "25 https://www.ziprecruiter.com/jobs/ziprecruiter... \n", - "26 https://www.kforce.com/Jobs/job.aspx?job=1696~... \n", - "27 https://www.ziprecruiter.com/jobs/onestaff-med... \n", - "28 https://www.ziprecruiter.com/jobs/rightstaff-i... \n", - "29 https://www.ziprecruiter.com/jobs/walker-ellio... \n", + "20 https://www.ziprecruiter.com/c/ZipRecruiter/Jo... \n", + "21 https://www.ziprecruiter.com/c/ZipRecruiter/Jo... \n", + "22 https://www.ziprecruiter.com/c/OneStaff-Medica... \n", + "23 https://jsv3.recruitics.com/redirect?rx_cid=34... \n", + "24 https://jsv3.recruitics.com/redirect?rx_cid=34... \n", + "25 https://click.appcast.io/track/hcbh0qq?cs=ngp&... \n", + "26 https://www.ziprecruiter.com/c/Generac-Power-S... \n", + "27 https://us62e2.dayforcehcm.com/CandidatePortal... \n", + "28 https://www.ziprecruiter.com/c/National-Indoor... \n", + "29 https://click.appcast.io/track/hdsbnae?cs=b4&j... \n", "\n", " description \n", - "0 We are looking for an experienced Firmware Eng... \n", - "1 Join a team recognized for leadership, innovat... \n", - "2 A little about us. Splunk is the key to enterp... \n", - "3 Stratacache, Inc. delivers in-store retail exp... \n", - "4 Join a team recognized for leadership, innovat... \n", - "5 Job Highlights As a Full Stack Software Engine... \n", - "6 Are you ready to embark on an exciting journey... \n", - "7 SciTec has been awarded multiple government co... \n", - "8 At Microsoft we are seeking people who have a ... \n", - "9 Avalon Healthcare Solutions, headquartered in ... \n", + "0 Mental Health Therapist- Broomfield, CO Locati... \n", + "1 .NET Software Engineer, C#, WPF - Irving (Tech... \n", + "2 Description Are you a communications systems d... \n", + "3 Who We Are Verkada is the largest cloud-based ... \n", + "4 Our Company Changing the world through digital... \n", + "5 Make your mark at Comcast - a Fortune 30 globa... \n", + "6 Smart City hiring a full stack software develo... \n", + "7 Join a team recognized for leadership, innovat... \n", + "8 Job Description: Software Engineer in Test The... \n", + "9 Title: Senior DSP/FPGA Firmware Engineer Descr... \n", "10 About us:Fieldguide is establishing a new stat... \n", "11 Description:By bringing together people that u... \n", "12 Description:By bringing together people that u... \n", "13 Description:By bringing together people that u... \n", "14 Description:By bringing together people that u... \n", "15 Description:By bringing together people that u... \n", - "16 We're only as strong as our weakest link.In th... \n", - "17 Description:By bringing together people that u... \n", + "16 Description:By bringing together people that u... \n", + "17 We're only as strong as our weakest link.In th... \n", "18 Rain’s mission is to create the fastest and ea... \n", "19 Work options: FlexibleWe consider remote, on-p... \n", - "20 We are currently seeking a highly skilled and ... \n", - "21 We offer a hybrid work environment. Most US-ba... \n", - "22 Robert Half has an opening for a Software Deve... \n", - "23 New career opportunity available with major Ma... \n", - "24 Robert Half is accepting inquiries for a SQL S... \n", - "25 We offer a hybrid work environment. Most US-ba... \n", - "26 Kforce has a client that is seeking a Software... \n", - "27 Company Description: We are looking for a well... \n", - "28 Job Description:We are seeking a talented and ... \n", - "29 Our highly successful DFW based client has bee... " + "20 Demonstrated foundation in software engineerin... \n", + "21 Experience in client side development using Re... \n", + "22 We are looking for a well-rounded Software Dev... \n", + "23 By joining the Silent Knight team as a Senior ... \n", + "24 Object Oriented Programming using C++ with Lin... \n", + "25 The Software Engineer III will be an integral ... \n", + "26 As a Software Engineer on the Energy Technolog... \n", + "27 Work with a cross-functional team to design, t... \n", + "28 As a Senior Software Engineer, you will: * Des... \n", + "29 Finally, through the work assigned, the analys... " ] }, + "execution_count": 5, "metadata": {}, - "output_type": "display_data" + "output_type": "execute_result" } ], "source": [ - "from jobspy import scrape_jobs\n", - "import pandas as pd\n", - "\n", - "jobs: pd.DataFrame = scrape_jobs(\n", + "scrape_jobs(\n", " site_name=[\"indeed\", \"linkedin\", \"zip_recruiter\"],\n", " search_term=\"software engineer\",\n", " results_wanted=10\n", - ")\n", - "\n", - "if jobs.empty:\n", - " print(\"No jobs found.\")\n", - "else:\n", - " # 1 print\n", - " pd.set_option('display.max_columns', None)\n", - " pd.set_option('display.max_rows', None)\n", - " pd.set_option('display.width', None)\n", - " pd.set_option('display.max_colwidth', 50) # set to 0 to see full job url / desc\n", - " print(jobs)\n", - "\n", - " # 2 display in Jupyter Notebook\n", - " display(jobs)\n", - "\n", - " # 3 output to csv\n", - " jobs.to_csv('jobs.csv', index=False)" + ")" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "efd667ef-fdf0-452a-b5e5-ce6825755be7", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1574dc17-0a42-4655-964f-5c03a6d3deb0", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { "kernelspec": { - "display_name": "my-poetry-env", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "my-poetry-env" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -693,7 +681,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.11" + "version": "3.11.4" } }, "nbformat": 4, diff --git a/README.md b/README.md index 0ef7f16..146dd8c 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,9 @@ - Scrapes job postings from **LinkedIn**, **Indeed** & **ZipRecruiter** simultaneously - Aggregates the job postings in a Pandas DataFrame - + +![jobspy](https://github.com/cullenwatson/JobSpy/assets/78247585/ec7ef355-05f6-4fd3-8161-a817e31c5c57) + ### Installation `pip install python-jobspy` @@ -26,18 +28,18 @@ jobs: pd.DataFrame = scrape_jobs( if jobs.empty: print("No jobs found.") else: - # 1 print + #1 print pd.set_option('display.max_columns', None) pd.set_option('display.max_rows', None) pd.set_option('display.width', None) pd.set_option('display.max_colwidth', 50) # set to 0 to see full job url / desc print(jobs) - # 2 display in Jupyter Notebook - # display(jobs) + #2 display in Jupyter Notebook + #display(jobs) - # 3 output to csv - # jobs.to_csv('jobs.csv', index=False) + #3 output to .csv + #jobs.to_csv('jobs.csv', index=False) ``` ### Output @@ -51,8 +53,6 @@ zip_recruiter Software Engineer - New Grad ZipRecruiter Santa Monica zip_recruiter Software Developer TEKsystems Phoenix AZ fulltime hourly 65 75 https://www.ziprecruiter.com/jobs/teksystems-0... Top Skills' Details• 6 years of Java developme... ``` ### Parameters for `scrape_jobs()` - - ```plaintext Required ├── site_type (List[enum]): linkedin, zip_recruiter, indeed @@ -87,12 +87,23 @@ JobPost ``` -### FAQ - -#### Encountering issues with your queries? - -Try reducing the number of `results_wanted` and/or broadening the filters. If problems persist, please submit an issue. - -#### Received a response code 429? -You have been blocked by the job board site for sending too many requests. ZipRecruiter seems to be the most aggressive at the moment. Consider waiting a few seconds, or try using a VPN. Proxy support coming soon. +## Frequently Asked Questions + +--- + +**Q: Encountering issues with your queries?** +**A:** Try reducing the number of `results_wanted` and/or broadening the filters. If problems persist, [submit an issue](#). + +--- + +**Q: Received a response code 429?** +**A:** This indicates that you have been blocked by the job board site for sending too many requests. Currently, **ZipRecruiter** is particularly aggressive with blocking. We recommend: + +- Waiting a few seconds between requests. +- Trying a VPN to change your IP address. + +**Note:** Proxy support is in development and coming soon! + +--- + diff --git a/pyproject.toml b/pyproject.toml index 1d89504..112f343 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "python-jobspy" -version = "1.0.1" +version = "1.0.2" description = "Job scraper for LinkedIn, Indeed & ZipRecruiter" authors = ["Zachary Hampton ", "Cullen Watson "] readme = "README.md" diff --git a/src/jobspy/core/__init__.py b/src/__init__.py similarity index 100% rename from src/jobspy/core/__init__.py rename to src/__init__.py diff --git a/src/jobspy/__init__.py b/src/jobspy/__init__.py index f3bb15f..9355d43 100644 --- a/src/jobspy/__init__.py +++ b/src/jobspy/__init__.py @@ -24,15 +24,14 @@ def _map_str_to_site(site_name: str) -> Site: def scrape_jobs( - site_name: str | Site | List[Site], - search_term: str, - - location: str = "", - distance: int = None, - is_remote: bool = False, - job_type: JobType = None, - easy_apply: bool = False, # linkedin - results_wanted: int = 15 + site_name: str | Site | List[Site], + search_term: str, + location: str = "", + distance: int = None, + is_remote: bool = False, + job_type: JobType = None, + easy_apply: bool = False, # linkedin + results_wanted: int = 15, ) -> pd.DataFrame: """ Asynchronously scrapes job data from multiple job sites. @@ -71,48 +70,59 @@ def scrape_jobs( for site, job_response in results.items(): for job in job_response.jobs: data = job.dict() - data['site'] = site + data["site"] = site # Formatting JobType - data['job_type'] = data['job_type'].value if data['job_type'] else None + data["job_type"] = data["job_type"].value if data["job_type"] else None # Formatting Location - location_obj = data.get('location') + location_obj = data.get("location") if location_obj and isinstance(location_obj, dict): - data['city'] = location_obj.get('city', '') - data['state'] = location_obj.get('state', '') - data['country'] = location_obj.get('country', 'USA') + data["city"] = location_obj.get("city", "") + data["state"] = location_obj.get("state", "") + data["country"] = location_obj.get("country", "USA") else: - data['city'] = None - data['state'] = None - data['country'] = None + data["city"] = None + data["state"] = None + data["country"] = None # Formatting Compensation - compensation_obj = data.get('compensation') + compensation_obj = data.get("compensation") if compensation_obj and isinstance(compensation_obj, dict): - data['interval'] = compensation_obj.get('interval').value if compensation_obj.get('interval') else None - data['min_amount'] = compensation_obj.get('min_amount') - data['max_amount'] = compensation_obj.get('max_amount') - data['currency'] = compensation_obj.get('currency', 'USD') + data["interval"] = ( + compensation_obj.get("interval").value + if compensation_obj.get("interval") + else None + ) + data["min_amount"] = compensation_obj.get("min_amount") + data["max_amount"] = compensation_obj.get("max_amount") + data["currency"] = compensation_obj.get("currency", "USD") else: - data['interval'] = None - data['min_amount'] = None - data['max_amount'] = None - data['currency'] = None + data["interval"] = None + data["min_amount"] = None + data["max_amount"] = None + data["currency"] = None job_df = pd.DataFrame([data]) dfs.append(job_df) if dfs: df = pd.concat(dfs, ignore_index=True) - desired_order = ['site', 'title', 'company_name', 'city', 'state','job_type', - 'interval', 'min_amount', 'max_amount', 'job_url', 'description',] + desired_order = [ + "site", + "title", + "company_name", + "city", + "state", + "job_type", + "interval", + "min_amount", + "max_amount", + "job_url", + "description", + ] df = df[desired_order] else: df = pd.DataFrame() return df - - - - diff --git a/src/jobspy/jobs/__init__.py b/src/jobspy/jobs/__init__.py index 771e847..4c1bc65 100644 --- a/src/jobspy/jobs/__init__.py +++ b/src/jobspy/jobs/__init__.py @@ -19,7 +19,6 @@ class JobType(Enum): VOLUNTEER = "volunteer" - class Location(BaseModel): country: str = "USA" city: str = None @@ -47,10 +46,10 @@ class JobPost(BaseModel): job_url: str location: Optional[Location] - description: str = None + description: Optional[str] = None job_type: Optional[JobType] = None compensation: Optional[Compensation] = None - date_posted: date = None + date_posted: Optional[date] = None class JobResponse(BaseModel): diff --git a/src/jobspy/scrapers/__init__.py b/src/jobspy/scrapers/__init__.py index 4df004b..b2a8900 100644 --- a/src/jobspy/scrapers/__init__.py +++ b/src/jobspy/scrapers/__init__.py @@ -1,5 +1,5 @@ from ..jobs import Enum, BaseModel, JobType, JobResponse -from typing import List, Dict, Optional, Any +from typing import List, Optional, Any class StatusException(Exception): diff --git a/src/jobspy/scrapers/indeed/__init__.py b/src/jobspy/scrapers/indeed/__init__.py index 846f4f7..4e20977 100644 --- a/src/jobspy/scrapers/indeed/__init__.py +++ b/src/jobspy/scrapers/indeed/__init__.py @@ -1,9 +1,8 @@ import re -import sys import math import json from datetime import datetime -from typing import Optional, Tuple, List +from typing import Optional import tls_client import urllib.parse @@ -11,7 +10,14 @@ from bs4 import BeautifulSoup from bs4.element import Tag from concurrent.futures import ThreadPoolExecutor, Future -from ...jobs import JobPost, Compensation, CompensationInterval, Location, JobResponse, JobType +from ...jobs import ( + JobPost, + Compensation, + CompensationInterval, + Location, + JobResponse, + JobType, +) from .. import Scraper, ScraperInput, Site, StatusException @@ -61,10 +67,7 @@ class IndeedScraper(Scraper): params["sc"] = "0kf:" + "".join(sc_values) + ";" response = session.get(self.url + "/jobs", params=params) - if ( - response.status_code != 200 - and response.status_code != 307 - ): + if response.status_code != 200 and response.status_code != 307: raise StatusException(response.status_code) soup = BeautifulSoup(response.content, "html.parser") @@ -136,8 +139,10 @@ class IndeedScraper(Scraper): return job_post with ThreadPoolExecutor(max_workers=10) as executor: - job_results: list[Future] = [executor.submit(process_job, job) for job in - jobs["metaData"]["mosaicProviderJobCardsModel"]["results"]] + job_results: list[Future] = [ + executor.submit(process_job, job) + for job in jobs["metaData"]["mosaicProviderJobCardsModel"]["results"] + ] job_list = [result.result() for result in job_results if result.result()] diff --git a/src/jobspy/scrapers/linkedin/__init__.py b/src/jobspy/scrapers/linkedin/__init__.py index b3718ae..e4e8bc9 100644 --- a/src/jobspy/scrapers/linkedin/__init__.py +++ b/src/jobspy/scrapers/linkedin/__init__.py @@ -6,7 +6,14 @@ from bs4 import BeautifulSoup from bs4.element import Tag from .. import Scraper, ScraperInput, Site -from ...jobs import JobPost, Location, JobResponse, JobType, Compensation, CompensationInterval +from ...jobs import ( + JobPost, + Location, + JobResponse, + JobType, + Compensation, + CompensationInterval, +) class LinkedInScraper(Scraper): @@ -117,7 +124,9 @@ class LinkedInScraper(Scraper): date_posted=date_posted, job_url=job_url, job_type=job_type, - compensation=Compensation(interval=CompensationInterval.YEARLY, currency="USD") + compensation=Compensation( + interval=CompensationInterval.YEARLY, currency="USD" + ), ) job_list.append(job_post) if ( diff --git a/src/jobspy/scrapers/ziprecruiter/__init__.py b/src/jobspy/scrapers/ziprecruiter/__init__.py index ad11d6f..eb35686 100644 --- a/src/jobspy/scrapers/ziprecruiter/__init__.py +++ b/src/jobspy/scrapers/ziprecruiter/__init__.py @@ -2,7 +2,7 @@ import math import json import re from datetime import datetime -from typing import Optional, Tuple, List +from typing import Optional, Tuple from urllib.parse import urlparse, parse_qs import tls_client @@ -11,7 +11,14 @@ from bs4.element import Tag from concurrent.futures import ThreadPoolExecutor, Future from .. import Scraper, ScraperInput, Site, StatusException -from ...jobs import JobPost, Compensation, CompensationInterval, Location, JobResponse, JobType +from ...jobs import ( + JobPost, + Compensation, + CompensationInterval, + Location, + JobResponse, + JobType, +) class ZipRecruiterScraper(Scraper): @@ -55,7 +62,7 @@ class ZipRecruiterScraper(Scraper): "search": scraper_input.search_term, "location": scraper_input.location, "page": page, - "form": "jobs-landing" + "form": "jobs-landing", } if scraper_input.is_remote: @@ -65,7 +72,9 @@ class ZipRecruiterScraper(Scraper): params["radius"] = scraper_input.distance if job_type_value: - params["refine_by_employment"] = f"employment_type:employment_type:{job_type_value}" + params[ + "refine_by_employment" + ] = f"employment_type:employment_type:{job_type_value}" response = self.session.get( self.url + "/jobs-search", @@ -90,11 +99,14 @@ class ZipRecruiterScraper(Scraper): with ThreadPoolExecutor(max_workers=10) as executor: if "jobList" in data and data["jobList"]: jobs_js = data["jobList"] - job_results = [executor.submit(self.process_job_js, job) for job in jobs_js] + job_results = [ + executor.submit(self.process_job_js, job) for job in jobs_js + ] else: jobs_html = soup.find_all("div", {"class": "job_content"}) - job_results = [executor.submit(self.process_job_html, job) for job in - jobs_html] + job_results = [ + executor.submit(self.process_job_html, job) for job in jobs_html + ] job_list = [result.result() for result in job_results if result.result()] @@ -107,8 +119,9 @@ class ZipRecruiterScraper(Scraper): :return: job_response """ - - pages_to_process = max(3, math.ceil(scraper_input.results_wanted / self.jobs_per_page)) + pages_to_process = max( + 3, math.ceil(scraper_input.results_wanted / self.jobs_per_page) + ) try: #: get first page to initialize session @@ -125,7 +138,6 @@ class ZipRecruiterScraper(Scraper): job_list += jobs - except StatusException as e: return JobResponse( success=False, @@ -162,9 +174,7 @@ class ZipRecruiterScraper(Scraper): title = job.find("h2", {"class": "title"}).text company = job.find("a", {"class": "company_name"}).text.strip() - description, updated_job_url = self.get_description( - job_url - ) + description, updated_job_url = self.get_description(job_url) if updated_job_url is not None: job_url = updated_job_url if description is None: @@ -173,10 +183,7 @@ class ZipRecruiterScraper(Scraper): job_type_element = job.find("li", {"class": "perk_item perk_type"}) if job_type_element: job_type_text = ( - job_type_element.text.strip() - .lower() - .replace("-", "") - .replace(" ", "") + job_type_element.text.strip().lower().replace("-", "").replace(" ", "") ) if job_type_text == "contractor": job_type_text = "contract" @@ -201,12 +208,16 @@ class ZipRecruiterScraper(Scraper): def process_job_js(self, job: dict) -> JobPost: # Map the job data to the expected fields by the Pydantic model title = job.get("Title") - description = BeautifulSoup(job.get("Snippet","").strip(), "html.parser").get_text() + description = BeautifulSoup( + job.get("Snippet", "").strip(), "html.parser" + ).get_text() company = job.get("OrgName") location = Location(city=job.get("City"), state=job.get("State")) try: - job_type = ZipRecruiterScraper.job_type_from_string(job.get("EmploymentType", "").replace("-", "_").lower()) + job_type = ZipRecruiterScraper.job_type_from_string( + job.get("EmploymentType", "").replace("-", "_").lower() + ) except ValueError: # print(f"Skipping job due to unrecognized job type: {job.get('EmploymentType')}") return None @@ -215,14 +226,14 @@ class ZipRecruiterScraper(Scraper): salary_parts = formatted_salary.split(" ") min_salary_str = salary_parts[0][1:].replace(",", "") - if '.' in min_salary_str: + if "." in min_salary_str: min_amount = int(float(min_salary_str) * 1000) else: min_amount = int(min_salary_str.replace("K", "000")) if len(salary_parts) >= 3 and salary_parts[2].startswith("$"): max_salary_str = salary_parts[2][1:].replace(",", "") - if '.' in max_salary_str: + if "." in max_salary_str: max_amount = int(float(max_salary_str) * 1000) else: max_amount = int(max_salary_str.replace("K", "000")) @@ -232,10 +243,12 @@ class ZipRecruiterScraper(Scraper): compensation = Compensation( interval=CompensationInterval.YEARLY, min_amount=min_amount, - max_amount=max_amount + max_amount=max_amount, ) save_job_url = job.get("SaveJobURL", "") - posted_time_match = re.search(r"posted_time=(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z)", save_job_url) + posted_time_match = re.search( + r"posted_time=(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z)", save_job_url + ) if posted_time_match: date_time_str = posted_time_match.group(1) date_posted_obj = datetime.strptime(date_time_str, "%Y-%m-%dT%H:%M:%SZ") @@ -269,10 +282,7 @@ class ZipRecruiterScraper(Scraper): return item raise ValueError(f"Invalid value for JobType: {value}") - def get_description( - self, - job_page_url: str - ) -> Tuple[Optional[str], Optional[str]]: + def get_description(self, job_page_url: str) -> Tuple[Optional[str], Optional[str]]: """ Retrieves job description by going to the job page url :param job_page_url: diff --git a/src/tests/__init__.py b/src/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/tests/test_indeed.py b/src/tests/test_indeed.py index 1d4ad52..bd9dfd1 100644 --- a/src/tests/test_indeed.py +++ b/src/tests/test_indeed.py @@ -1,4 +1,4 @@ -from jobspy import scrape_jobs +from ..jobspy import scrape_jobs def test_indeed():