From 1c264b8c580c1c96010fd955a184b4557c698b6a Mon Sep 17 00:00:00 2001 From: Cullen Watson Date: Tue, 5 Sep 2023 12:17:22 -0500 Subject: [PATCH] Indeed country support (#38) --- JobSpy_Demo.ipynb | 1469 +++++++++++++----- README.md | 91 +- pyproject.toml | 2 +- src/jobspy/__init__.py | 39 +- src/jobspy/jobs/__init__.py | 162 +- src/jobspy/scrapers/__init__.py | 6 +- src/jobspy/scrapers/indeed/__init__.py | 75 +- src/jobspy/scrapers/linkedin/__init__.py | 21 +- src/jobspy/scrapers/ziprecruiter/__init__.py | 70 +- 9 files changed, 1403 insertions(+), 532 deletions(-) diff --git a/JobSpy_Demo.ipynb b/JobSpy_Demo.ipynb index 98b74ad..3d9355f 100644 --- a/JobSpy_Demo.ipynb +++ b/JobSpy_Demo.ipynb @@ -26,7 +26,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 3, "id": "1253c1f8-9437-492e-9dd3-e7fe51099420", "metadata": {}, "outputs": [ @@ -53,13 +53,13 @@ " \n", " site\n", " title\n", - " company_name\n", - " city\n", - " state\n", + " company\n", + " location\n", " job_type\n", " interval\n", " min_amount\n", " max_amount\n", + " currency\n", " job_url\n", " description\n", " \n", @@ -67,423 +67,423 @@ " \n", " \n", " 0\n", - " indeed\n", - " Mental Health Therapist\n", - " Sandstone Care\n", - " Broomfield\n", - " CO\n", + " linkedin\n", + " Manufacturing Engineer / Grand Prairie, TX\n", + " Lockheed Martin\n", + " Grand Prairie, TX\n", " fulltime\n", " yearly\n", - " 68000\n", - " 57500\n", - " https://www.indeed.com/viewjob?jk=f5f33d72e030...\n", - " Mental Health Therapist- Broomfield, CO Locati...\n", + " None\n", + " None\n", + " None\n", + " https://www.linkedin.com/jobs/view/3681196520\n", + " Duties IncludeThis position requires a self-mo...\n", " \n", " \n", " 1\n", - " indeed\n", - " .NET Developer\n", - " Noir Consulting\n", - " Irving\n", - " TX\n", - " None\n", + " linkedin\n", + " Manufacturing Engineer / Grand Prairie, TX\n", + " Lockheed Martin\n", + " Grand Prairie, TX\n", + " fulltime\n", " yearly\n", - " 200000\n", - " 200000\n", - " https://www.indeed.com/viewjob?jk=1b22ba65296c...\n", - " .NET Software Engineer, C#, WPF - Irving (Tech...\n", + " None\n", + " None\n", + " None\n", + " https://www.linkedin.com/jobs/view/3641099818\n", + " Duties IncludeThis position requires a self-mo...\n", " \n", " \n", " 2\n", - " indeed\n", - " Senior Software Engineer\n", - " Johns Hopkins Applied Physics Laboratory (APL)\n", - " Laurel\n", - " MD\n", + " linkedin\n", + " Procurement Engineer\n", + " Lockheed Martin\n", + " Grand Prairie, TX\n", + " fulltime\n", + " yearly\n", " None\n", " None\n", " None\n", - " None\n", - " https://www.indeed.com/viewjob?jk=309eed270a88...\n", - " Description Are you a communications systems d...\n", + " https://www.linkedin.com/jobs/view/3707147989\n", + " Seeking a Procurement Engineer to join the Pre...\n", " \n", " \n", " 3\n", - " indeed\n", - " Front End Developer\n", - " Verkada\n", - " San Mateo\n", - " CA\n", + " linkedin\n", + " Manufacturing Engineer / Grand Prairie, TX\n", + " Lockheed Martin\n", + " Grand Prairie, TX\n", " fulltime\n", " yearly\n", - " 285000\n", - " 120000\n", - " https://www.indeed.com/viewjob?jk=a3ea45daca75...\n", - " Who We Are Verkada is the largest cloud-based ...\n", + " None\n", + " None\n", + " None\n", + " https://www.linkedin.com/jobs/view/3704142926\n", + " Lockheed Martin is seeking a Manufacturing Eng...\n", " \n", " \n", " 4\n", - " indeed\n", - " Software Engineer\n", - " Adobe\n", - " San Jose\n", - " CA\n", + " linkedin\n", + " Materials & Process Engineer\n", + " Lockheed Martin\n", + " Grand Prairie, TX\n", " fulltime\n", " yearly\n", - " 142700\n", - " 73200\n", - " https://www.indeed.com/viewjob?jk=0f2dc9901fc7...\n", - " Our Company Changing the world through digital...\n", + " None\n", + " None\n", + " None\n", + " https://www.linkedin.com/jobs/view/3706406945\n", + " Take your career to a new level and work at an...\n", " \n", " \n", " 5\n", - " indeed\n", - " Full Stack Developer\n", - " Comcast\n", - " Philadelphia\n", - " PA\n", + " linkedin\n", + " Manufacturing Engineering SME/Orlando, FL or D...\n", + " Lockheed Martin\n", + " Grand Prairie, TX\n", " fulltime\n", " yearly\n", - " 184663\n", - " 78789\n", - " https://www.indeed.com/viewjob?jk=eb5c927221eb...\n", - " Make your mark at Comcast - a Fortune 30 globa...\n", + " None\n", + " None\n", + " None\n", + " https://www.linkedin.com/jobs/view/3674491182\n", + " **WORK LOCATIONS: ORLANDO, FL OR DALLAS, TX**L...\n", " \n", " \n", " 6\n", - " indeed\n", - " Senior Software Engineer\n", - " Smart City Solutions\n", - " \n", - " FL\n", + " linkedin\n", + " Manufacturing Engineer Staff / Grand Prairie, TX\n", + " Lockheed Martin\n", + " Grand Prairie, TX\n", " fulltime\n", " yearly\n", - " 100000\n", - " 85000\n", - " https://www.indeed.com/viewjob?jk=ba1945f143a1...\n", - " Smart City hiring a full stack software develo...\n", + " None\n", + " None\n", + " None\n", + " https://www.linkedin.com/jobs/view/3700345185\n", + " Lockheed Martin Missiles and Fire Control is s...\n", " \n", " \n", " 7\n", - " indeed\n", - " Computer Engineer\n", - " Honeywell\n", - " \n", - " None\n", + " linkedin\n", + " Manufacturing Engineer Staff / Grand Prairie, TX\n", + " Lockheed Martin\n", + " Grand Prairie, TX\n", " fulltime\n", + " yearly\n", " None\n", " None\n", " None\n", - " https://www.indeed.com/viewjob?jk=5a1da623ee75...\n", - " Join a team recognized for leadership, innovat...\n", + " https://www.linkedin.com/jobs/view/3693554508\n", + " Description:Lockheed Martin Missiles and Fire ...\n", " \n", " \n", " 8\n", - " indeed\n", - " Software Engineer\n", - " Fidelity Investments\n", - " Westlake\n", - " TX\n", + " linkedin\n", + " Life Scientist/Environmental Engineer/Physical...\n", + " US Environmental Protection Agency (EPA)\n", + " Dallas, TX\n", + " fulltime\n", + " yearly\n", " None\n", " None\n", " None\n", - " None\n", - " https://www.indeed.com/viewjob?jk=b600392166bb...\n", - " Job Description: Software Engineer in Test The...\n", + " https://www.linkedin.com/jobs/view/3691767133\n", + " Help Help Requirements Conditions of Employmen...\n", " \n", " \n", " 9\n", - " indeed\n", - " Fpga Engineer\n", - " R-DEX Systems, Inc.\n", - " Atlanta\n", - " GA\n", - " fulltime\n", + " linkedin\n", + " Mechanical Design Engineer - Early Career - Da...\n", + " Lockheed Martin\n", + " Grand Prairie, TX\n", + " parttime\n", " yearly\n", - " 160000\n", - " 120000\n", - " https://www.indeed.com/viewjob?jk=a7e9d356c333...\n", - " Title: Senior DSP/FPGA Firmware Engineer Descr...\n", + " None\n", + " None\n", + " None\n", + " https://www.linkedin.com/jobs/view/3665731446\n", + " Description:Lockheed Martin MFC in is seeking ...\n", " \n", " \n", " 10\n", - " linkedin\n", - " Software Engineer\n", - " Fieldguide\n", - " San Francisco\n", - " CA\n", + " indeed\n", + " Senior Geotechnical Engineer\n", + " TSIT Engineering\n", + " Dallas-Fort Worth, TX, USA\n", " fulltime\n", " yearly\n", - " None\n", - " None\n", - " https://www.linkedin.com/jobs/view/3696158160\n", - " About us:Fieldguide is establishing a new stat...\n", + " 160000\n", + " 150000\n", + " USD\n", + " https://www.indeed.com/viewjob?jk=7e9ac4cadd48...\n", + " Sr. Geotechnical Engineer, PE (Generous Sign-o...\n", " \n", " \n", " 11\n", - " linkedin\n", - " Software Engineer - Early Career\n", - " Lockheed Martin\n", - " Sunnyvale\n", - " CA\n", + " indeed\n", + " Field Engineer\n", + " Pipeline Conditioning\n", + " TX, USA\n", " fulltime\n", " yearly\n", - " None\n", - " None\n", - " https://www.linkedin.com/jobs/view/3693012711\n", - " Description:By bringing together people that u...\n", + " 112000\n", + " 90000\n", + " USD\n", + " https://www.indeed.com/viewjob?jk=634d6fc182df...\n", + " Have you been looking for a job that promotes ...\n", " \n", " \n", " 12\n", - " linkedin\n", - " Software Engineer - Early Career\n", - " Lockheed Martin\n", - " Edwards\n", - " CA\n", + " indeed\n", + " Project Manager\n", + " Patriot Erectors, Inc.\n", + " Dallas-Fort Worth, TX, USA\n", " fulltime\n", " yearly\n", - " None\n", - " None\n", - " https://www.linkedin.com/jobs/view/3700669785\n", - " Description:By bringing together people that u...\n", + " -1\n", + " 80000\n", + " USD\n", + " https://www.indeed.com/viewjob?jk=6e40c70c7854...\n", + " *Patriot Erectors, Inc; an AISC certified stru...\n", " \n", " \n", " 13\n", - " linkedin\n", - " Software Engineer - Early Career\n", - " Lockheed Martin\n", - " Fort Worth\n", - " TX\n", + " indeed\n", + " Senior Reliability Engineer\n", + " Crescent Systems, Inc.\n", + " Richardson, TX, USA\n", " fulltime\n", " yearly\n", - " None\n", - " None\n", - " https://www.linkedin.com/jobs/view/3701770659\n", - " Description:By bringing together people that u...\n", + " -1\n", + " 150000\n", + " USD\n", + " https://www.indeed.com/viewjob?jk=cfda7cdd5f9b...\n", + " ABOUT CSI: For the past 27 years, Crescent Sys...\n", " \n", " \n", " 14\n", - " linkedin\n", - " Software Engineer - Early Career\n", - " Lockheed Martin\n", - " Fort Worth\n", - " TX\n", + " indeed\n", + " Geotechnical Engineer\n", + " Midwest Engineering and Testing Corporation\n", + " Dallas-Fort Worth, TX, USA\n", " fulltime\n", - " yearly\n", " None\n", " None\n", - " https://www.linkedin.com/jobs/view/3701769637\n", - " Description:By bringing together people that u...\n", + " None\n", + " None\n", + " https://www.indeed.com/viewjob?jk=3913ad000636...\n", + " This is a full time in person job located in O...\n", " \n", " \n", " 15\n", - " linkedin\n", - " Software Engineer - Early Career\n", - " Lockheed Martin\n", - " Fort Worth\n", - " TX\n", - " fulltime\n", - " yearly\n", + " indeed\n", + " Campus Recruiter\n", + " Turner Construction Company\n", + " Dallas, TX, USA\n", " None\n", " None\n", - " https://www.linkedin.com/jobs/view/3701772329\n", - " Description:By bringing together people that u...\n", + " None\n", + " None\n", + " None\n", + " https://www.indeed.com/viewjob?jk=b3e88b422df5...\n", + " Assigned to a field operations team to assist ...\n", " \n", " \n", " 16\n", - " linkedin\n", - " Software Engineer - Early Career\n", - " Lockheed Martin\n", - " Fort Worth\n", - " TX\n", + " indeed\n", + " Field Engineer\n", + " Flatiron Construction Corp\n", + " Dallas, TX, USA\n", " fulltime\n", " yearly\n", - " None\n", - " None\n", - " https://www.linkedin.com/jobs/view/3701775201\n", - " Description:By bringing together people that u...\n", + " -1\n", + " 70000\n", + " USD\n", + " https://www.indeed.com/viewjob?jk=27a8a65d19d7...\n", + " Overview: Come join our Dallas-Fort Worth team...\n", " \n", " \n", " 17\n", - " linkedin\n", - " Software Engineer\n", - " SpiderOak\n", - " Austin\n", - " TX\n", + " indeed\n", + " Audio Visual Engineer\n", + " Integrated Media Technologies, Inc.\n", + " Dallas, TX, USA\n", " fulltime\n", " yearly\n", - " None\n", - " None\n", - " https://www.linkedin.com/jobs/view/3707174719\n", - " We're only as strong as our weakest link.In th...\n", + " 90000\n", + " 70000\n", + " USD\n", + " https://www.indeed.com/viewjob?jk=004fa9a63978...\n", + " Position Overview The preferred candidate will...\n", " \n", " \n", " 18\n", - " linkedin\n", - " Full-Stack Software Engineer\n", - " Rain\n", - " New York\n", - " NY\n", + " indeed\n", + " Junior System Engineer\n", + " Crescent Systems, Inc.\n", + " Richardson, TX, USA\n", " fulltime\n", " yearly\n", - " None\n", - " None\n", - " https://www.linkedin.com/jobs/view/3696158877\n", - " Rain’s mission is to create the fastest and ea...\n", + " -1\n", + " 80000\n", + " USD\n", + " https://www.indeed.com/viewjob?jk=ee00fc9dec32...\n", + " Company Overview: Crescent Systems, Inc. (CSI)...\n", " \n", " \n", " 19\n", - " linkedin\n", - " Software Engineer\n", - " Nike\n", - " Portland\n", - " OR\n", - " contract\n", - " yearly\n", + " indeed\n", + " Process Engineer\n", + " Andersen Windows & Doors\n", + " Garland, TX, USA\n", + " fulltime\n", " None\n", " None\n", - " https://www.linkedin.com/jobs/view/3693340247\n", - " Work options: FlexibleWe consider remote, on-p...\n", + " None\n", + " None\n", + " https://www.indeed.com/viewjob?jk=e25c4602dacb...\n", + " ANDERSEN CORPORATION SUMMARY: We’re focused on...\n", " \n", " \n", " 20\n", " zip_recruiter\n", - " Software Engineer - New Grad\n", - " ZipRecruiter\n", - " Santa Monica\n", - " CA\n", - " fulltime\n", + " Consultant Engineer II- Boiler and Machinery\n", + " FM Global\n", + " Addison, TX\n", + " None\n", " yearly\n", - " 130000\n", - " 150000\n", - " https://www.ziprecruiter.com/c/ZipRecruiter/Jo...\n", - " Demonstrated foundation in software engineerin...\n", + " 76000\n", + " 129000\n", + " USD/CAD\n", + " https://dsp.prng.co/Y98LJ5b?clickid=[click_id]\n", + " FM Global is currently seeking a boiler & mach...\n", " \n", " \n", " 21\n", " zip_recruiter\n", - " Full Stack Software Engineer\n", - " ZipRecruiter\n", - " Phoenix\n", - " AZ\n", - " fulltime\n", + " Experienced AV Project Engineer - Dallas, TX\n", + " Ford Audio-Video Systems\n", + " Dallas, TX\n", + " None\n", " yearly\n", - " 105000\n", - " 145000\n", - " https://www.ziprecruiter.com/c/ZipRecruiter/Jo...\n", - " Experience in client side development using Re...\n", + " 80000\n", + " 120000\n", + " USD/CAD\n", + " https://www.ziprecruiter.com/c/Ford-Audio-Vide...\n", + " The Experienced Project Engineer is an integra...\n", " \n", " \n", " 22\n", " zip_recruiter\n", - " Software Developer | Onsite | Omaha, NE - Omaha\n", - " OneStaff Medical\n", - " Omaha\n", - " NE\n", - " fulltime\n", + " Electrical Engineer, EIT - MEP\n", + " Advantage Technical\n", + " Plano, TX\n", + " None\n", " yearly\n", - " 60000\n", - " 110000\n", - " https://www.ziprecruiter.com/c/OneStaff-Medica...\n", - " We are looking for a well-rounded Software Dev...\n", + " 85000\n", + " 120000\n", + " USD/CAD\n", + " https://www.ziprecruiter.com/c/Advantage-Techn...\n", + " Electrical Engineer, EIT - MEP Position Locati...\n", " \n", " \n", " 23\n", " zip_recruiter\n", - " Senior Software Engineer, Onsite [Real-time]\n", - " Raytheon\n", - " McKinney\n", - " TX\n", - " fulltime\n", + " Sr Mechanical Engineer\n", + " Staffmark\n", + " Allen, TX\n", + " None\n", " yearly\n", - " 116000\n", - " 153000\n", - " https://jsv3.recruitics.com/redirect?rx_cid=34...\n", - " By joining the Silent Knight team as a Senior ...\n", + " 120000\n", + " 140000\n", + " USD/CAD\n", + " https://www.ziprecruiter.com/c/Staffmark/Job/S...\n", + " We are seeking a versatile and agile Sr. Desig...\n", " \n", " \n", " 24\n", " zip_recruiter\n", - " Senior Software Engineer - TS/SCI **Minimum $2...\n", - " Raytheon\n", - " Dallas\n", - " TX\n", - " fulltime\n", + " Electrical Engineer, PE - MEP\n", + " Advantage Technical\n", + " Plano, TX\n", + " None\n", " yearly\n", - " 122000\n", - " 162000\n", - " https://jsv3.recruitics.com/redirect?rx_cid=34...\n", - " Object Oriented Programming using C++ with Lin...\n", + " 115000\n", + " 150000\n", + " USD/CAD\n", + " https://www.ziprecruiter.com/c/Advantage-Techn...\n", + " Electrical Engineer, PE - MEP Position Type: D...\n", " \n", " \n", " 25\n", " zip_recruiter\n", - " Software Engineer III (full stack, AI/ML, Djan...\n", - " Ayahealthcare\n", - " Remote\n", - " OR\n", + " Distributed Tracing Engineer (Hybrid)\n", + " Citi\n", + " Irving, TX\n", " None\n", " yearly\n", - " 156000\n", - " 165000\n", - " https://click.appcast.io/track/hcbh0qq?cs=ngp&...\n", - " The Software Engineer III will be an integral ...\n", + " 122000\n", + " 182000\n", + " USD/CAD\n", + " https://dsp.prng.co/_59bQ8b?clickid=[click_id]\n", + " ... Engineer to perform software engineering a...\n", " \n", " \n", " 26\n", " zip_recruiter\n", - " Software Engineer Full Stack\n", - " Generac Power Systems\n", - " Denver\n", - " CO\n", - " fulltime\n", + " Mechanical Engineer - Aerospace Power Systems/...\n", + " Systematic Business Consulting\n", + " Dallas, TX\n", + " None\n", " yearly\n", - " 90000\n", - " 115000\n", - " https://www.ziprecruiter.com/c/Generac-Power-S...\n", - " As a Software Engineer on the Energy Technolog...\n", + " 70000\n", + " 85000\n", + " USD/CAD\n", + " https://www.ziprecruiter.com/c/Systematic-Busi...\n", + " Position Summary The Mechanical Engineer as pa...\n", " \n", " \n", " 27\n", " zip_recruiter\n", - " Embedded Software Engineer (Fort Worth, TX or ...\n", - " Kubota\n", - " Fort Worth\n", - " TX\n", - " fulltime\n", + " Optical Engineer (Road Warrior)\n", + " Glotel Inc\n", + " Dallas, TX\n", + " contract\n", " yearly\n", - " 122000\n", - " 167000\n", - " https://us62e2.dayforcehcm.com/CandidatePortal...\n", - " Work with a cross-functional team to design, t...\n", + " 30\n", + " 36\n", + " USD/CAD\n", + " https://www.ziprecruiter.com/c/Glotel-Inc/Job/...\n", + " Exciting contract/contract to hire role for a ...\n", " \n", " \n", " 28\n", " zip_recruiter\n", - " Senior Software Engineer (FT)\n", - " National Indoor RV Center\n", - " Lewisville\n", - " TX\n", - " fulltime\n", + " Mechanical Engineer\n", + " Core Personnel\n", + " Dallas, TX\n", + " None\n", " yearly\n", - " 125000\n", - " 0\n", - " https://www.ziprecruiter.com/c/National-Indoor...\n", - " As a Senior Software Engineer, you will: * Des...\n", + " 70000\n", + " 85000\n", + " USD/CAD\n", + " https://www.ziprecruiter.com/c/Core-Personnel/...\n", + " Mechanical Engineer The Mechanical Engineer as...\n", " \n", " \n", " 29\n", " zip_recruiter\n", - " 2024 Next Gen IT Program | Software Engineerin...\n", - " Southern Glazer's Wine & Spirits\n", - " Dallas\n", - " TX\n", + " Sr. SailPoint Engineer\n", + " Capital One\n", + " Plano, TX\n", " None\n", " yearly\n", - " 70000\n", " 0\n", - " https://click.appcast.io/track/hdsbnae?cs=b4&j...\n", - " Finally, through the work assigned, the analys...\n", + " 0\n", + " USD/CAD\n", + " https://dsp.prng.co/_h2Vr5b?clickid=[click_id]\n", + " Sr. SailPoint Engineer Do you want to work for...\n", " \n", " \n", "\n", @@ -491,177 +491,792 @@ ], "text/plain": [ " site title \\\n", - "0 indeed Mental Health Therapist \n", - "1 indeed .NET Developer \n", - "2 indeed Senior Software Engineer \n", - "3 indeed Front End Developer \n", - "4 indeed Software Engineer \n", - "5 indeed Full Stack Developer \n", - "6 indeed Senior Software Engineer \n", - "7 indeed Computer Engineer \n", - "8 indeed Software Engineer \n", - "9 indeed Fpga Engineer \n", - "10 linkedin Software Engineer \n", - "11 linkedin Software Engineer - Early Career \n", - "12 linkedin Software Engineer - Early Career \n", - "13 linkedin Software Engineer - Early Career \n", - "14 linkedin Software Engineer - Early Career \n", - "15 linkedin Software Engineer - Early Career \n", - "16 linkedin Software Engineer - Early Career \n", - "17 linkedin Software Engineer \n", - "18 linkedin Full-Stack Software Engineer \n", - "19 linkedin Software Engineer \n", - "20 zip_recruiter Software Engineer - New Grad \n", - "21 zip_recruiter Full Stack Software Engineer \n", - "22 zip_recruiter Software Developer | Onsite | Omaha, NE - Omaha \n", - "23 zip_recruiter Senior Software Engineer, Onsite [Real-time] \n", - "24 zip_recruiter Senior Software Engineer - TS/SCI **Minimum $2... \n", - "25 zip_recruiter Software Engineer III (full stack, AI/ML, Djan... \n", - "26 zip_recruiter Software Engineer Full Stack \n", - "27 zip_recruiter Embedded Software Engineer (Fort Worth, TX or ... \n", - "28 zip_recruiter Senior Software Engineer (FT) \n", - "29 zip_recruiter 2024 Next Gen IT Program | Software Engineerin... \n", + "0 linkedin Manufacturing Engineer / Grand Prairie, TX \n", + "1 linkedin Manufacturing Engineer / Grand Prairie, TX \n", + "2 linkedin Procurement Engineer \n", + "3 linkedin Manufacturing Engineer / Grand Prairie, TX \n", + "4 linkedin Materials & Process Engineer \n", + "5 linkedin Manufacturing Engineering SME/Orlando, FL or D... \n", + "6 linkedin Manufacturing Engineer Staff / Grand Prairie, TX \n", + "7 linkedin Manufacturing Engineer Staff / Grand Prairie, TX \n", + "8 linkedin Life Scientist/Environmental Engineer/Physical... \n", + "9 linkedin Mechanical Design Engineer - Early Career - Da... \n", + "10 indeed Senior Geotechnical Engineer \n", + "11 indeed Field Engineer \n", + "12 indeed Project Manager \n", + "13 indeed Senior Reliability Engineer \n", + "14 indeed Geotechnical Engineer \n", + "15 indeed Campus Recruiter \n", + "16 indeed Field Engineer \n", + "17 indeed Audio Visual Engineer \n", + "18 indeed Junior System Engineer \n", + "19 indeed Process Engineer \n", + "20 zip_recruiter Consultant Engineer II- Boiler and Machinery \n", + "21 zip_recruiter Experienced AV Project Engineer - Dallas, TX \n", + "22 zip_recruiter Electrical Engineer, EIT - MEP \n", + "23 zip_recruiter Sr Mechanical Engineer \n", + "24 zip_recruiter Electrical Engineer, PE - MEP \n", + "25 zip_recruiter Distributed Tracing Engineer (Hybrid) \n", + "26 zip_recruiter Mechanical Engineer - Aerospace Power Systems/... \n", + "27 zip_recruiter Optical Engineer (Road Warrior) \n", + "28 zip_recruiter Mechanical Engineer \n", + "29 zip_recruiter Sr. SailPoint Engineer \n", "\n", - " company_name city state \\\n", - "0 Sandstone Care Broomfield CO \n", - "1 Noir Consulting Irving TX \n", - "2 Johns Hopkins Applied Physics Laboratory (APL) Laurel MD \n", - "3 Verkada San Mateo CA \n", - "4 Adobe San Jose CA \n", - "5 Comcast Philadelphia PA \n", - "6 Smart City Solutions FL \n", - "7 Honeywell None \n", - "8 Fidelity Investments Westlake TX \n", - "9 R-DEX Systems, Inc. Atlanta GA \n", - "10 Fieldguide San Francisco CA \n", - "11 Lockheed Martin Sunnyvale CA \n", - "12 Lockheed Martin Edwards CA \n", - "13 Lockheed Martin Fort Worth TX \n", - "14 Lockheed Martin Fort Worth TX \n", - "15 Lockheed Martin Fort Worth TX \n", - "16 Lockheed Martin Fort Worth TX \n", - "17 SpiderOak Austin TX \n", - "18 Rain New York NY \n", - "19 Nike Portland OR \n", - "20 ZipRecruiter Santa Monica CA \n", - "21 ZipRecruiter Phoenix AZ \n", - "22 OneStaff Medical Omaha NE \n", - "23 Raytheon McKinney TX \n", - "24 Raytheon Dallas TX \n", - "25 Ayahealthcare Remote OR \n", - "26 Generac Power Systems Denver CO \n", - "27 Kubota Fort Worth TX \n", - "28 National Indoor RV Center Lewisville TX \n", - "29 Southern Glazer's Wine & Spirits Dallas TX \n", + " company location \\\n", + "0 Lockheed Martin Grand Prairie, TX \n", + "1 Lockheed Martin Grand Prairie, TX \n", + "2 Lockheed Martin Grand Prairie, TX \n", + "3 Lockheed Martin Grand Prairie, TX \n", + "4 Lockheed Martin Grand Prairie, TX \n", + "5 Lockheed Martin Grand Prairie, TX \n", + "6 Lockheed Martin Grand Prairie, TX \n", + "7 Lockheed Martin Grand Prairie, TX \n", + "8 US Environmental Protection Agency (EPA) Dallas, TX \n", + "9 Lockheed Martin Grand Prairie, TX \n", + "10 TSIT Engineering Dallas-Fort Worth, TX, USA \n", + "11 Pipeline Conditioning TX, USA \n", + "12 Patriot Erectors, Inc. Dallas-Fort Worth, TX, USA \n", + "13 Crescent Systems, Inc. Richardson, TX, USA \n", + "14 Midwest Engineering and Testing Corporation Dallas-Fort Worth, TX, USA \n", + "15 Turner Construction Company Dallas, TX, USA \n", + "16 Flatiron Construction Corp Dallas, TX, USA \n", + "17 Integrated Media Technologies, Inc. Dallas, TX, USA \n", + "18 Crescent Systems, Inc. Richardson, TX, USA \n", + "19 Andersen Windows & Doors Garland, TX, USA \n", + "20 FM Global Addison, TX \n", + "21 Ford Audio-Video Systems Dallas, TX \n", + "22 Advantage Technical Plano, TX \n", + "23 Staffmark Allen, TX \n", + "24 Advantage Technical Plano, TX \n", + "25 Citi Irving, TX \n", + "26 Systematic Business Consulting Dallas, TX \n", + "27 Glotel Inc Dallas, TX \n", + "28 Core Personnel Dallas, TX \n", + "29 Capital One Plano, TX \n", "\n", - " job_type interval min_amount max_amount \\\n", - "0 fulltime yearly 68000 57500 \n", - "1 None yearly 200000 200000 \n", - "2 None None None None \n", - "3 fulltime yearly 285000 120000 \n", - "4 fulltime yearly 142700 73200 \n", - "5 fulltime yearly 184663 78789 \n", - "6 fulltime yearly 100000 85000 \n", - "7 fulltime None None None \n", - "8 None None None None \n", - "9 fulltime yearly 160000 120000 \n", - "10 fulltime yearly None None \n", - "11 fulltime yearly None None \n", - "12 fulltime yearly None None \n", - "13 fulltime yearly None None \n", - "14 fulltime yearly None None \n", - "15 fulltime yearly None None \n", - "16 fulltime yearly None None \n", - "17 fulltime yearly None None \n", - "18 fulltime yearly None None \n", - "19 contract yearly None None \n", - "20 fulltime yearly 130000 150000 \n", - "21 fulltime yearly 105000 145000 \n", - "22 fulltime yearly 60000 110000 \n", - "23 fulltime yearly 116000 153000 \n", - "24 fulltime yearly 122000 162000 \n", - "25 None yearly 156000 165000 \n", - "26 fulltime yearly 90000 115000 \n", - "27 fulltime yearly 122000 167000 \n", - "28 fulltime yearly 125000 0 \n", - "29 None yearly 70000 0 \n", + " job_type interval min_amount max_amount currency \\\n", + "0 fulltime yearly None None None \n", + "1 fulltime yearly None None None \n", + "2 fulltime yearly None None None \n", + "3 fulltime yearly None None None \n", + "4 fulltime yearly None None None \n", + "5 fulltime yearly None None None \n", + "6 fulltime yearly None None None \n", + "7 fulltime yearly None None None \n", + "8 fulltime yearly None None None \n", + "9 parttime yearly None None None \n", + "10 fulltime yearly 160000 150000 USD \n", + "11 fulltime yearly 112000 90000 USD \n", + "12 fulltime yearly -1 80000 USD \n", + "13 fulltime yearly -1 150000 USD \n", + "14 fulltime None None None None \n", + "15 None None None None None \n", + "16 fulltime yearly -1 70000 USD \n", + "17 fulltime yearly 90000 70000 USD \n", + "18 fulltime yearly -1 80000 USD \n", + "19 fulltime None None None None \n", + "20 None yearly 76000 129000 USD/CAD \n", + "21 None yearly 80000 120000 USD/CAD \n", + "22 None yearly 85000 120000 USD/CAD \n", + "23 None yearly 120000 140000 USD/CAD \n", + "24 None yearly 115000 150000 USD/CAD \n", + "25 None yearly 122000 182000 USD/CAD \n", + "26 None yearly 70000 85000 USD/CAD \n", + "27 contract yearly 30 36 USD/CAD \n", + "28 None yearly 70000 85000 USD/CAD \n", + "29 None yearly 0 0 USD/CAD \n", "\n", " job_url \\\n", - "0 https://www.indeed.com/viewjob?jk=f5f33d72e030... \n", - "1 https://www.indeed.com/viewjob?jk=1b22ba65296c... \n", - "2 https://www.indeed.com/viewjob?jk=309eed270a88... \n", - "3 https://www.indeed.com/viewjob?jk=a3ea45daca75... \n", - "4 https://www.indeed.com/viewjob?jk=0f2dc9901fc7... \n", - "5 https://www.indeed.com/viewjob?jk=eb5c927221eb... \n", - "6 https://www.indeed.com/viewjob?jk=ba1945f143a1... \n", - "7 https://www.indeed.com/viewjob?jk=5a1da623ee75... \n", - "8 https://www.indeed.com/viewjob?jk=b600392166bb... \n", - "9 https://www.indeed.com/viewjob?jk=a7e9d356c333... \n", - "10 https://www.linkedin.com/jobs/view/3696158160 \n", - "11 https://www.linkedin.com/jobs/view/3693012711 \n", - "12 https://www.linkedin.com/jobs/view/3700669785 \n", - "13 https://www.linkedin.com/jobs/view/3701770659 \n", - "14 https://www.linkedin.com/jobs/view/3701769637 \n", - "15 https://www.linkedin.com/jobs/view/3701772329 \n", - "16 https://www.linkedin.com/jobs/view/3701775201 \n", - "17 https://www.linkedin.com/jobs/view/3707174719 \n", - "18 https://www.linkedin.com/jobs/view/3696158877 \n", - "19 https://www.linkedin.com/jobs/view/3693340247 \n", - "20 https://www.ziprecruiter.com/c/ZipRecruiter/Jo... \n", - "21 https://www.ziprecruiter.com/c/ZipRecruiter/Jo... \n", - "22 https://www.ziprecruiter.com/c/OneStaff-Medica... \n", - "23 https://jsv3.recruitics.com/redirect?rx_cid=34... \n", - "24 https://jsv3.recruitics.com/redirect?rx_cid=34... \n", - "25 https://click.appcast.io/track/hcbh0qq?cs=ngp&... \n", - "26 https://www.ziprecruiter.com/c/Generac-Power-S... \n", - "27 https://us62e2.dayforcehcm.com/CandidatePortal... \n", - "28 https://www.ziprecruiter.com/c/National-Indoor... \n", - "29 https://click.appcast.io/track/hdsbnae?cs=b4&j... \n", + "0 https://www.linkedin.com/jobs/view/3681196520 \n", + "1 https://www.linkedin.com/jobs/view/3641099818 \n", + "2 https://www.linkedin.com/jobs/view/3707147989 \n", + "3 https://www.linkedin.com/jobs/view/3704142926 \n", + "4 https://www.linkedin.com/jobs/view/3706406945 \n", + "5 https://www.linkedin.com/jobs/view/3674491182 \n", + "6 https://www.linkedin.com/jobs/view/3700345185 \n", + "7 https://www.linkedin.com/jobs/view/3693554508 \n", + "8 https://www.linkedin.com/jobs/view/3691767133 \n", + "9 https://www.linkedin.com/jobs/view/3665731446 \n", + "10 https://www.indeed.com/viewjob?jk=7e9ac4cadd48... \n", + "11 https://www.indeed.com/viewjob?jk=634d6fc182df... \n", + "12 https://www.indeed.com/viewjob?jk=6e40c70c7854... \n", + "13 https://www.indeed.com/viewjob?jk=cfda7cdd5f9b... \n", + "14 https://www.indeed.com/viewjob?jk=3913ad000636... \n", + "15 https://www.indeed.com/viewjob?jk=b3e88b422df5... \n", + "16 https://www.indeed.com/viewjob?jk=27a8a65d19d7... \n", + "17 https://www.indeed.com/viewjob?jk=004fa9a63978... \n", + "18 https://www.indeed.com/viewjob?jk=ee00fc9dec32... \n", + "19 https://www.indeed.com/viewjob?jk=e25c4602dacb... \n", + "20 https://dsp.prng.co/Y98LJ5b?clickid=[click_id] \n", + "21 https://www.ziprecruiter.com/c/Ford-Audio-Vide... \n", + "22 https://www.ziprecruiter.com/c/Advantage-Techn... \n", + "23 https://www.ziprecruiter.com/c/Staffmark/Job/S... \n", + "24 https://www.ziprecruiter.com/c/Advantage-Techn... \n", + "25 https://dsp.prng.co/_59bQ8b?clickid=[click_id] \n", + "26 https://www.ziprecruiter.com/c/Systematic-Busi... \n", + "27 https://www.ziprecruiter.com/c/Glotel-Inc/Job/... \n", + "28 https://www.ziprecruiter.com/c/Core-Personnel/... \n", + "29 https://dsp.prng.co/_h2Vr5b?clickid=[click_id] \n", "\n", " description \n", - "0 Mental Health Therapist- Broomfield, CO Locati... \n", - "1 .NET Software Engineer, C#, WPF - Irving (Tech... \n", - "2 Description Are you a communications systems d... \n", - "3 Who We Are Verkada is the largest cloud-based ... \n", - "4 Our Company Changing the world through digital... \n", - "5 Make your mark at Comcast - a Fortune 30 globa... \n", - "6 Smart City hiring a full stack software develo... \n", - "7 Join a team recognized for leadership, innovat... \n", - "8 Job Description: Software Engineer in Test The... \n", - "9 Title: Senior DSP/FPGA Firmware Engineer Descr... \n", - "10 About us:Fieldguide is establishing a new stat... \n", - "11 Description:By bringing together people that u... \n", - "12 Description:By bringing together people that u... \n", - "13 Description:By bringing together people that u... \n", - "14 Description:By bringing together people that u... \n", - "15 Description:By bringing together people that u... \n", - "16 Description:By bringing together people that u... \n", - "17 We're only as strong as our weakest link.In th... \n", - "18 Rain’s mission is to create the fastest and ea... \n", - "19 Work options: FlexibleWe consider remote, on-p... \n", - "20 Demonstrated foundation in software engineerin... \n", - "21 Experience in client side development using Re... \n", - "22 We are looking for a well-rounded Software Dev... \n", - "23 By joining the Silent Knight team as a Senior ... \n", - "24 Object Oriented Programming using C++ with Lin... \n", - "25 The Software Engineer III will be an integral ... \n", - "26 As a Software Engineer on the Energy Technolog... \n", - "27 Work with a cross-functional team to design, t... \n", - "28 As a Senior Software Engineer, you will: * Des... \n", - "29 Finally, through the work assigned, the analys... " + "0 Duties IncludeThis position requires a self-mo... \n", + "1 Duties IncludeThis position requires a self-mo... \n", + "2 Seeking a Procurement Engineer to join the Pre... \n", + "3 Lockheed Martin is seeking a Manufacturing Eng... \n", + "4 Take your career to a new level and work at an... \n", + "5 **WORK LOCATIONS: ORLANDO, FL OR DALLAS, TX**L... \n", + "6 Lockheed Martin Missiles and Fire Control is s... \n", + "7 Description:Lockheed Martin Missiles and Fire ... \n", + "8 Help Help Requirements Conditions of Employmen... \n", + "9 Description:Lockheed Martin MFC in is seeking ... \n", + "10 Sr. Geotechnical Engineer, PE (Generous Sign-o... \n", + "11 Have you been looking for a job that promotes ... \n", + "12 *Patriot Erectors, Inc; an AISC certified stru... \n", + "13 ABOUT CSI: For the past 27 years, Crescent Sys... \n", + "14 This is a full time in person job located in O... \n", + "15 Assigned to a field operations team to assist ... \n", + "16 Overview: Come join our Dallas-Fort Worth team... \n", + "17 Position Overview The preferred candidate will... \n", + "18 Company Overview: Crescent Systems, Inc. (CSI)... \n", + "19 ANDERSEN CORPORATION SUMMARY: We’re focused on... \n", + "20 FM Global is currently seeking a boiler & mach... \n", + "21 The Experienced Project Engineer is an integra... \n", + "22 Electrical Engineer, EIT - MEP Position Locati... \n", + "23 We are seeking a versatile and agile Sr. Desig... \n", + "24 Electrical Engineer, PE - MEP Position Type: D... \n", + "25 ... Engineer to perform software engineering a... \n", + "26 Position Summary The Mechanical Engineer as pa... \n", + "27 Exciting contract/contract to hire role for a ... \n", + "28 Mechanical Engineer The Mechanical Engineer as... \n", + "29 Sr. SailPoint Engineer Do you want to work for... " ] }, - "execution_count": 5, "metadata": {}, - "output_type": "execute_result" + "output_type": "display_data" } ], "source": [ - "scrape_jobs(\n", - " site_name=[\"indeed\", \"linkedin\", \"zip_recruiter\"],\n", - " search_term=\"software engineer\",\n", + "jobs: pd.DataFrame = scrape_jobs(\n", + " site_name=[\"linkedin\", 'indeed', 'zip_recruiter'],\n", + " location='Dallas, TX',\n", + " search_term=\"engineer\",\n", " results_wanted=10\n", - ")" + ")\n", + "display(jobs)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "369048f8-8786-4c00-bf45-f3c3d67863f5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sitetitlecompanylocationjob_typeintervalmin_amountmax_amountcurrencyjob_urldescription
0linkedinJunior Mechanical EngineerDanieli GroupfulltimeyearlyNoneNoneNonehttps://www.linkedin.com/jobs/view/3611053535Area of Responsibilities:Individual/or group i...
1linkedinManufacturing Engineer - Electronics/PCBA Asse...Michael PagefulltimeyearlyNoneNoneNonehttps://www.linkedin.com/jobs/view/3648093202Global leader in Aerospace electronics PCBA te...
2linkedinMaintenance Inspection EngineerPERSOLKELLY VietnamfulltimeyearlyNoneNoneNonehttps://www.linkedin.com/jobs/view/3697261989To support the Maintenance Manager undertake t...
3linkedinService Engineer (Beverage Processing Equipment)Robert WaltersfulltimeyearlyNoneNoneNonehttps://www.linkedin.com/jobs/view/3706342013Our client a global manufacturer providing equ...
4linkedinProduction EngineerRobert WaltersfulltimeyearlyNoneNoneNonehttps://www.linkedin.com/jobs/view/3690665746An exciting Production Engineer job has arisen...
5linkedinFinishing Engineer (Raw Materials Development)ArticlefulltimeyearlyNoneNoneNonehttps://www.linkedin.com/jobs/view/3670081257Department: Product (Furniture)Employment Type...
6linkedinQuality Control EngineerKyanon DigitalfulltimeyearlyNoneNoneNonehttps://www.linkedin.com/jobs/view/3581798119Mô tả công việcYou Will Live on the cutting ed...
7linkedinSupplier Quality & Process Development Enginee...Michael PagefulltimeyearlyNoneNoneNonehttps://www.linkedin.com/jobs/view/3673535848Top leading company in sport cycling/e-bike in...
8linkedinPiping EngineerDanieli GroupfulltimeyearlyNoneNoneNonehttps://www.linkedin.com/jobs/view/3667258241Area of Responsibilities: To develop piping ba...
9linkedinChief EngineerColliersfulltimeyearlyNoneNoneNonehttps://www.linkedin.com/jobs/view/3679214061Company DescriptionColliers International (NAS...
10indeedBack End DeveloperBStar SolutionsThành phố Hồ Chí Minh, SG, VietnamNoneNoneNoneNoneNonehttps://vn.indeed.com/viewjob?jk=6add2f6133d9f9b1Research, Design, and Develop highly reliable,...
11indeedEngineerFedEx Express APACThành phố Hồ Chí Minh, SG, VietnamfulltimeNoneNoneNoneNonehttps://vn.indeed.com/viewjob?jk=7603355cdb64978bAir Network Design; Air Network Planning & Imp...
12indeedJunior Software EngineerBaker HughesThành phố Hồ Chí Minh, SG, VietnamfulltimeNoneNoneNoneNonehttps://vn.indeed.com/viewjob?jk=efe63ad2198a8696Are you passionate about being part of a succe...
13indeedTechnical EngineerViệc Làm Công Ty NhậtThành phố Hồ Chí Minh, SG, VietnamNoneNoneNoneNoneNonehttps://vn.indeed.com/viewjob?jk=60b9009eb85c96f3Mô tả công việc Hướng dẫn, đào tạo cho các bạn...
14indeedFull Stack DeveloperLogix TechnologyThành phố Hồ Chí Minh, SG, VietnamNoneNoneNoneNoneNonehttps://vn.indeed.com/viewjob?jk=f2a4c4d9fefff4a2JOB DESCRIPTION Assessing and understanding us...
15indeedFront End DeveloperEXE CORPThành phố Hồ Chí Minh, SG, VietnamNoneNoneNoneNoneNonehttps://vn.indeed.com/viewjob?jk=d62240dfef8e707dMô tả công việc We are seeking a talented Fron...
16indeedAutomation EngineerJabil CircuitThành phố Hồ Chí Minh, SG, VietnamfulltimeNoneNoneNoneNonehttps://vn.indeed.com/viewjob?jk=4a6693f5609e2a37At Jabil we strive to make ANYTHING POSSIBLE a...
17indeedData Science InternAhaMoveThành phố Hồ Chí Minh, SG, VietnamNoneNoneNoneNoneNonehttps://vn.indeed.com/viewjob?jk=e0f1da9b429cf4f9Job Description Collaborate with product, cust...
18indeedFront End DeveloperTrusting SocialThành phố Hồ Chí Minh, SG, VietnamfulltimeNoneNoneNoneNonehttps://vn.indeed.com/viewjob?jk=45b95875f50491f8Mô tả công việc: (Mức lương: Thỏa thuận) Desig...
19indeedSoftware EngineerCông ty Cổ phần Bóng đèn Điện QuangThành phố Hồ Chí Minh, SG, VietnamfulltimeNoneNoneNoneNonehttps://vn.indeed.com/viewjob?jk=836160680fda7520Mô tả công việc: (Mức lương: Thỏa thuận) C, Py...
\n", + "
" + ], + "text/plain": [ + " site title \\\n", + "0 linkedin Junior Mechanical Engineer \n", + "1 linkedin Manufacturing Engineer - Electronics/PCBA Asse... \n", + "2 linkedin Maintenance Inspection Engineer \n", + "3 linkedin Service Engineer (Beverage Processing Equipment) \n", + "4 linkedin Production Engineer \n", + "5 linkedin Finishing Engineer (Raw Materials Development) \n", + "6 linkedin Quality Control Engineer \n", + "7 linkedin Supplier Quality & Process Development Enginee... \n", + "8 linkedin Piping Engineer \n", + "9 linkedin Chief Engineer \n", + "10 indeed Back End Developer \n", + "11 indeed Engineer \n", + "12 indeed Junior Software Engineer \n", + "13 indeed Technical Engineer \n", + "14 indeed Full Stack Developer \n", + "15 indeed Front End Developer \n", + "16 indeed Automation Engineer \n", + "17 indeed Data Science Intern \n", + "18 indeed Front End Developer \n", + "19 indeed Software Engineer \n", + "\n", + " company location \\\n", + "0 Danieli Group \n", + "1 Michael Page \n", + "2 PERSOLKELLY Vietnam \n", + "3 Robert Walters \n", + "4 Robert Walters \n", + "5 Article \n", + "6 Kyanon Digital \n", + "7 Michael Page \n", + "8 Danieli Group \n", + "9 Colliers \n", + "10 BStar Solutions Thành phố Hồ Chí Minh, SG, Vietnam \n", + "11 FedEx Express APAC Thành phố Hồ Chí Minh, SG, Vietnam \n", + "12 Baker Hughes Thành phố Hồ Chí Minh, SG, Vietnam \n", + "13 Việc Làm Công Ty Nhật Thành phố Hồ Chí Minh, SG, Vietnam \n", + "14 Logix Technology Thành phố Hồ Chí Minh, SG, Vietnam \n", + "15 EXE CORP Thành phố Hồ Chí Minh, SG, Vietnam \n", + "16 Jabil Circuit Thành phố Hồ Chí Minh, SG, Vietnam \n", + "17 AhaMove Thành phố Hồ Chí Minh, SG, Vietnam \n", + "18 Trusting Social Thành phố Hồ Chí Minh, SG, Vietnam \n", + "19 Công ty Cổ phần Bóng đèn Điện Quang Thành phố Hồ Chí Minh, SG, Vietnam \n", + "\n", + " job_type interval min_amount max_amount currency \\\n", + "0 fulltime yearly None None None \n", + "1 fulltime yearly None None None \n", + "2 fulltime yearly None None None \n", + "3 fulltime yearly None None None \n", + "4 fulltime yearly None None None \n", + "5 fulltime yearly None None None \n", + "6 fulltime yearly None None None \n", + "7 fulltime yearly None None None \n", + "8 fulltime yearly None None None \n", + "9 fulltime yearly None None None \n", + "10 None None None None None \n", + "11 fulltime None None None None \n", + "12 fulltime None None None None \n", + "13 None None None None None \n", + "14 None None None None None \n", + "15 None None None None None \n", + "16 fulltime None None None None \n", + "17 None None None None None \n", + "18 fulltime None None None None \n", + "19 fulltime None None None None \n", + "\n", + " job_url \\\n", + "0 https://www.linkedin.com/jobs/view/3611053535 \n", + "1 https://www.linkedin.com/jobs/view/3648093202 \n", + "2 https://www.linkedin.com/jobs/view/3697261989 \n", + "3 https://www.linkedin.com/jobs/view/3706342013 \n", + "4 https://www.linkedin.com/jobs/view/3690665746 \n", + "5 https://www.linkedin.com/jobs/view/3670081257 \n", + "6 https://www.linkedin.com/jobs/view/3581798119 \n", + "7 https://www.linkedin.com/jobs/view/3673535848 \n", + "8 https://www.linkedin.com/jobs/view/3667258241 \n", + "9 https://www.linkedin.com/jobs/view/3679214061 \n", + "10 https://vn.indeed.com/viewjob?jk=6add2f6133d9f9b1 \n", + "11 https://vn.indeed.com/viewjob?jk=7603355cdb64978b \n", + "12 https://vn.indeed.com/viewjob?jk=efe63ad2198a8696 \n", + "13 https://vn.indeed.com/viewjob?jk=60b9009eb85c96f3 \n", + "14 https://vn.indeed.com/viewjob?jk=f2a4c4d9fefff4a2 \n", + "15 https://vn.indeed.com/viewjob?jk=d62240dfef8e707d \n", + "16 https://vn.indeed.com/viewjob?jk=4a6693f5609e2a37 \n", + "17 https://vn.indeed.com/viewjob?jk=e0f1da9b429cf4f9 \n", + "18 https://vn.indeed.com/viewjob?jk=45b95875f50491f8 \n", + "19 https://vn.indeed.com/viewjob?jk=836160680fda7520 \n", + "\n", + " description \n", + "0 Area of Responsibilities:Individual/or group i... \n", + "1 Global leader in Aerospace electronics PCBA te... \n", + "2 To support the Maintenance Manager undertake t... \n", + "3 Our client a global manufacturer providing equ... \n", + "4 An exciting Production Engineer job has arisen... \n", + "5 Department: Product (Furniture)Employment Type... \n", + "6 Mô tả công việcYou Will Live on the cutting ed... \n", + "7 Top leading company in sport cycling/e-bike in... \n", + "8 Area of Responsibilities: To develop piping ba... \n", + "9 Company DescriptionColliers International (NAS... \n", + "10 Research, Design, and Develop highly reliable,... \n", + "11 Air Network Design; Air Network Planning & Imp... \n", + "12 Are you passionate about being part of a succe... \n", + "13 Mô tả công việc Hướng dẫn, đào tạo cho các bạn... \n", + "14 JOB DESCRIPTION Assessing and understanding us... \n", + "15 Mô tả công việc We are seeking a talented Fron... \n", + "16 At Jabil we strive to make ANYTHING POSSIBLE a... \n", + "17 Job Description Collaborate with product, cust... \n", + "18 Mô tả công việc: (Mức lương: Thỏa thuận) Desig... \n", + "19 Mô tả công việc: (Mức lương: Thỏa thuận) C, Py... " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "jobs: pd.DataFrame = scrape_jobs(\n", + " site_name=[\"linkedin\", 'indeed', 'zip_recruiter'],\n", + " # LinkedIn searches worldwide, ZipRecruiter is US/CANADA\n", + " location='Ho Chi Minh', \n", + " search_term=\"engineer\",\n", + " results_wanted=10,\n", + "\n", + " # country: only needed for Indeed\n", + " country='vietnam'\n", + ")\n", + "display(jobs)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "15a06086-f054-47ec-bae8-0fa96ce51cf3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sitetitlecompanylocationjob_typeintervalmin_amountmax_amountcurrencyjob_urldescription
0linkedinData Scientist (Mid/Sr)SmartDevfulltimeyearlyNoneNoneNonehttps://www.linkedin.com/jobs/view/3709991002Company DescriptionSmartDev is a leading provi...
1linkedinRemote Machine Learning DeveloperScopic SoftwarefulltimeyearlyNoneNoneNonehttps://www.linkedin.com/jobs/view/3697781855Are you on the hunt for exciting new challenge...
2linkedin[Remote] Deep Learning Engineer [A-M]Aloha Consulting GroupfulltimeyearlyNoneNoneNonehttps://www.linkedin.com/jobs/view/3706069538This is a remote position.Foster collaboration...
3linkedinAI EngineerCreative ForcefulltimeyearlyNoneNoneNonehttps://www.linkedin.com/jobs/view/3705326015THE ROLEWe are seeking an experienced AI Engin...
4linkedinApplication EngineerTechSource SystemsfulltimeyearlyNoneNoneNonehttps://www.linkedin.com/jobs/view/3700637050Job SummaryAs an Application Engineer, you wil...
\n", + "
" + ], + "text/plain": [ + " site title company \\\n", + "0 linkedin Data Scientist (Mid/Sr) SmartDev \n", + "1 linkedin Remote Machine Learning Developer Scopic Software \n", + "2 linkedin [Remote] Deep Learning Engineer [A-M] Aloha Consulting Group \n", + "3 linkedin AI Engineer Creative Force \n", + "4 linkedin Application Engineer TechSource Systems \n", + "\n", + " location job_type interval min_amount max_amount currency \\\n", + "0 fulltime yearly None None None \n", + "1 fulltime yearly None None None \n", + "2 fulltime yearly None None None \n", + "3 fulltime yearly None None None \n", + "4 fulltime yearly None None None \n", + "\n", + " job_url \\\n", + "0 https://www.linkedin.com/jobs/view/3709991002 \n", + "1 https://www.linkedin.com/jobs/view/3697781855 \n", + "2 https://www.linkedin.com/jobs/view/3706069538 \n", + "3 https://www.linkedin.com/jobs/view/3705326015 \n", + "4 https://www.linkedin.com/jobs/view/3700637050 \n", + "\n", + " description \n", + "0 Company DescriptionSmartDev is a leading provi... \n", + "1 Are you on the hunt for exciting new challenge... \n", + "2 This is a remote position.Foster collaboration... \n", + "3 THE ROLEWe are seeking an experienced AI Engin... \n", + "4 Job SummaryAs an Application Engineer, you wil... " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "jobs: pd.DataFrame = scrape_jobs(\n", + " site_name=[\"linkedin\"],\n", + " location=\"Hanoi\",\n", + " search_term=\"data scientist\",\n", + " results_wanted=5,\n", + ")\n", + "display(jobs)" ] } ], @@ -681,7 +1296,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.4" + "version": "3.10.11" } }, "nbformat": 4, diff --git a/README.md b/README.md index 597e276..1213867 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,11 @@ import pandas as pd jobs: pd.DataFrame = scrape_jobs( site_name=["indeed", "linkedin", "zip_recruiter"], search_term="software engineer", - results_wanted=10 + location="Dallas, TX", + results_wanted=10, + + # country: only needed for indeed + country='USA' ) if jobs.empty: @@ -65,8 +69,10 @@ Optional ├── is_remote (bool) ├── results_wanted (int): number of job results to retrieve for each site specified in 'site_type' ├── easy_apply (bool): filters for jobs on LinkedIn that have the 'Easy Apply' option +├── country (enum): uses the corresponding subdomain on Indeed (e.g. Canada on Indeed is ca.indeed.com ``` + ### JobPost Schema ```plaintext JobPost @@ -81,12 +87,91 @@ JobPost ├── job_type (enum) ├── compensation (object) │ ├── interval (CompensationInterval): yearly, monthly, weekly, daily, hourly -│ ├── min_amount (float) -│ ├── max_amount (float) +│ ├── min_amount (int) +│ ├── max_amount (int) │ └── currency (str) └── date_posted (datetime) ``` +## Supported Countries for Job Searching + + +### **LinkedIn** + +LinkedIn searches globally. Use the `location` parameter + +### **ZipRecruiter** + +ZipRecruiter searches for jobs in US/Canada. Use the `location` parameter + + +### **Indeed** +For Indeed, you `location` along with `country` param + +You can specify the following countries when searching on Indeed (use the exact name): + +- Argentina +- Australia +- Austria +- Bahrain +- Belgium +- Brazil +- Canada +- Chile +- China +- Colombia +- Costa Rica +- Czech Republic +- Denmark +- Ecuador +- Egypt +- Finland +- France +- Germany +- Greece +- Hong Kong +- Hungary +- India +- Indonesia +- Ireland +- Israel +- Italy +- Japan +- Kuwait +- Luxembourg +- Malaysia +- Mexico +- Morocco +- Netherlands +- New Zealand +- Nigeria +- Norway +- Oman +- Pakistan +- Panama +- Peru +- Philippines +- Poland +- Portugal +- Qatar +- Romania +- Saudi Arabia +- Singapore +- South Africa +- South Korea +- Spain +- Sweden +- Switzerland +- Taiwan +- Thailand +- Turkey +- Ukraine +- United Arab Emirates +- UK +- USA +- Uruguay +- Venezuela +- Vietnam ## Frequently Asked Questions diff --git a/pyproject.toml b/pyproject.toml index 96d5a3e..f9a5c4d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "python-jobspy" -version = "1.0.3" +version = "1.1.0" description = "Job scraper for LinkedIn, Indeed & ZipRecruiter" authors = ["Zachary Hampton ", "Cullen Watson "] readme = "README.md" diff --git a/src/jobspy/__init__.py b/src/jobspy/__init__.py index 9355d43..1b6c68c 100644 --- a/src/jobspy/__init__.py +++ b/src/jobspy/__init__.py @@ -1,15 +1,11 @@ import pandas as pd from typing import List, Tuple -from .jobs import JobType +from .jobs import JobType, Location from .scrapers.indeed import IndeedScraper from .scrapers.ziprecruiter import ZipRecruiterScraper from .scrapers.linkedin import LinkedInScraper -from .scrapers import ( - ScraperInput, - Site, - JobResponse, -) +from .scrapers import ScraperInput, Site, JobResponse, Country SCRAPER_MAPPING = { @@ -32,6 +28,7 @@ def scrape_jobs( job_type: JobType = None, easy_apply: bool = False, # linkedin results_wanted: int = 15, + country: str = "usa", ) -> pd.DataFrame: """ Asynchronously scrapes job data from multiple job sites. @@ -41,9 +38,12 @@ def scrape_jobs( if type(site_name) == str: site_name = _map_str_to_site(site_name) + country_enum = Country.from_string(country) + site_type = [site_name] if type(site_name) == Site else site_name scraper_input = ScraperInput( site_type=site_type, + country=country_enum, search_term=search_term, location=location, distance=distance, @@ -71,22 +71,15 @@ def scrape_jobs( for job in job_response.jobs: data = job.dict() data["site"] = site - - # Formatting JobType - data["job_type"] = data["job_type"].value if data["job_type"] else None - - # Formatting Location - location_obj = data.get("location") - if location_obj and isinstance(location_obj, dict): - data["city"] = location_obj.get("city", "") - data["state"] = location_obj.get("state", "") - data["country"] = location_obj.get("country", "USA") + data["company"] = data["company_name"] + if data["job_type"]: + # Take the first value from the job type tuple + data["job_type"] = data["job_type"].value[0] else: - data["city"] = None - data["state"] = None - data["country"] = None + data["job_type"] = None + + data["location"] = Location(**data["location"]).display_location() - # Formatting Compensation compensation_obj = data.get("compensation") if compensation_obj and isinstance(compensation_obj, dict): data["interval"] = ( @@ -111,13 +104,13 @@ def scrape_jobs( desired_order = [ "site", "title", - "company_name", - "city", - "state", + "company", + "location", "job_type", "interval", "min_amount", "max_amount", + "currency", "job_url", "description", ] diff --git a/src/jobspy/jobs/__init__.py b/src/jobspy/jobs/__init__.py index 4c1bc65..ebc2e1f 100644 --- a/src/jobspy/jobs/__init__.py +++ b/src/jobspy/jobs/__init__.py @@ -6,24 +6,160 @@ from pydantic import BaseModel, validator class JobType(Enum): - FULL_TIME = "fulltime" - PART_TIME = "parttime" - CONTRACT = "contract" - TEMPORARY = "temporary" - INTERNSHIP = "internship" + FULL_TIME = ( + "fulltime", + "períodointegral", + "estágio/trainee", + "cunormăîntreagă", + "tiempocompleto", + "vollzeit", + "voltijds", + "tempointegral", + "全职", + "plnýúvazek", + "fuldtid", + "دوامكامل", + "kokopäivätyö", + "tempsplein", + "vollzeit", + "πλήρηςαπασχόληση", + "teljesmunkaidő", + "tempopieno", + "tempsplein", + "heltid", + "jornadacompleta", + "pełnyetat", + "정규직", + "100%", + "全職", + "งานประจำ", + "tamzamanlı", + "повназайнятість", + "toànthờigian", + ) + PART_TIME = ("parttime", "teilzeit") + CONTRACT = ("contract", "contractor") + TEMPORARY = ("temporary",) + INTERNSHIP = ("internship", "prácticas", "ojt(onthejobtraining)", "praktikum") - PER_DIEM = "perdiem" - NIGHTS = "nights" - OTHER = "other" - SUMMER = "summer" - VOLUNTEER = "volunteer" + PER_DIEM = ("perdiem",) + NIGHTS = ("nights",) + OTHER = ("other",) + SUMMER = ("summer",) + VOLUNTEER = ("volunteer",) + + +class Country(Enum): + ARGENTINA = ("argentina", "ar") + AUSTRALIA = ("australia", "au") + AUSTRIA = ("austria", "at") + BAHRAIN = ("bahrain", "bh") + BELGIUM = ("belgium", "be") + BRAZIL = ("brazil", "br") + CANADA = ("canada", "ca") + CHILE = ("chile", "cl") + CHINA = ("china", "cn") + COLOMBIA = ("colombia", "co") + COSTARICA = ("costa rica", "cr") + CZECHREPUBLIC = ("czech republic", "cz") + DENMARK = ("denmark", "dk") + ECUADOR = ("ecuador", "ec") + EGYPT = ("egypt", "eg") + FINLAND = ("finland", "fi") + FRANCE = ("france", "fr") + GERMANY = ("germany", "de") + GREECE = ("greece", "gr") + HONGKONG = ("hong kong", "hk") + HUNGARY = ("hungary", "hu") + INDIA = ("india", "in") + INDONESIA = ("indonesia", "id") + IRELAND = ("ireland", "ie") + ISRAEL = ("israel", "il") + ITALY = ("italy", "it") + JAPAN = ("japan", "jp") + KUWAIT = ("kuwait", "kw") + LUXEMBOURG = ("luxembourg", "lu") + MALAYSIA = ("malaysia", "malaysia") + MEXICO = ("mexico", "mx") + MOROCCO = ("morocco", "ma") + NETHERLANDS = ("netherlands", "nl") + NEWZEALAND = ("new zealand", "nz") + NIGERIA = ("nigeria", "ng") + NORWAY = ("norway", "no") + OMAN = ("oman", "om") + PAKISTAN = ("pakistan", "pk") + PANAMA = ("panama", "pa") + PERU = ("peru", "pe") + PHILIPPINES = ("philippines", "ph") + POLAND = ("poland", "pl") + PORTUGAL = ("portugal", "pt") + QATAR = ("qatar", "qa") + ROMANIA = ("romania", "ro") + SAUDIARABIA = ("saudi arabia", "sa") + SINGAPORE = ("singapore", "sg") + SOUTHAFRICA = ("south africa", "za") + SOUTHKOREA = ("south korea", "kr") + SPAIN = ("spain", "es") + SWEDEN = ("sweden", "se") + SWITZERLAND = ("switzerland", "ch") + TAIWAN = ("taiwan", "tw") + THAILAND = ("thailand", "th") + TURKEY = ("turkey", "tr") + UKRAINE = ("ukraine", "ua") + UNITEDARABEMIRATES = ("united arab emirates", "ae") + UK = ("uk", "uk") + USA = ("usa", "www") + URUGUAY = ("uruguay", "uy") + VENEZUELA = ("venezuela", "ve") + VIETNAM = ("vietnam", "vn") + + # internal for ziprecruiter + US_CANADA = ("usa/ca", "www") + + # internal for linkeind + WORLDWIDE = ("worldwide", "www") + + def __new__(cls, country, domain): + obj = object.__new__(cls) + obj._value_ = country + obj.domain = domain + return obj + + @property + def domain_value(self): + return self.domain + + @classmethod + def from_string(cls, country_str: str): + """Convert a string to the corresponding Country enum.""" + country_str = country_str.strip().lower() + for country in cls: + if country.value == country_str: + return country + valid_countries = [country.value for country in cls] + raise ValueError( + f"Invalid country string: '{country_str}'. Valid countries (only include this param for Indeed) are: {', '.join(valid_countries)}" + ) class Location(BaseModel): - country: str = "USA" - city: str = None + country: Country = None + city: Optional[str] = None state: Optional[str] = None + def display_location(self) -> str: + location_parts = [] + if self.city: + location_parts.append(self.city) + if self.state: + location_parts.append(self.state) + if self.country and self.country not in (Country.US_CANADA, Country.WORLDWIDE): + if self.country.value in ("usa", "uk"): + location_parts.append(self.country.value.upper()) + else: + location_parts.append(self.country.value.title()) + return ", ".join(location_parts) + class CompensationInterval(Enum): YEARLY = "yearly" @@ -37,7 +173,7 @@ class Compensation(BaseModel): interval: CompensationInterval min_amount: int = None max_amount: int = None - currency: str = "USD" + currency: Optional[str] = "USD" class JobPost(BaseModel): diff --git a/src/jobspy/scrapers/__init__.py b/src/jobspy/scrapers/__init__.py index b2a8900..ba0246e 100644 --- a/src/jobspy/scrapers/__init__.py +++ b/src/jobspy/scrapers/__init__.py @@ -1,4 +1,4 @@ -from ..jobs import Enum, BaseModel, JobType, JobResponse +from ..jobs import Enum, BaseModel, JobType, JobResponse, Country from typing import List, Optional, Any @@ -18,6 +18,7 @@ class ScraperInput(BaseModel): search_term: str location: str = None + country: Optional[Country] = Country.USA distance: Optional[int] = None is_remote: bool = False job_type: Optional[JobType] = None @@ -35,9 +36,8 @@ class CommonResponse(BaseModel): class Scraper: - def __init__(self, site: Site, url: str): + def __init__(self, site: Site): self.site = site - self.url = url def scrape(self, scraper_input: ScraperInput) -> JobResponse: ... diff --git a/src/jobspy/scrapers/indeed/__init__.py b/src/jobspy/scrapers/indeed/__init__.py index 4e20977..70df33b 100644 --- a/src/jobspy/scrapers/indeed/__init__.py +++ b/src/jobspy/scrapers/indeed/__init__.py @@ -1,6 +1,8 @@ import re import math +import io import json +import traceback from datetime import datetime from typing import Optional @@ -18,7 +20,7 @@ from ...jobs import ( JobResponse, JobType, ) -from .. import Scraper, ScraperInput, Site, StatusException +from .. import Scraper, ScraperInput, Site, Country, StatusException class ParsingException(Exception): @@ -31,8 +33,7 @@ class IndeedScraper(Scraper): Initializes IndeedScraper with the Indeed job search url """ site = Site(Site.INDEED) - url = "https://www.indeed.com" - super().__init__(site, url) + super().__init__(site) self.jobs_per_page = 15 self.seen_urls = set() @@ -47,16 +48,21 @@ class IndeedScraper(Scraper): :param session: :return: jobs found on page, total number of jobs found for search """ + self.country = scraper_input.country + domain = self.country.domain_value + self.url = f"https://{domain}.indeed.com" job_list = [] params = { "q": scraper_input.search_term, "l": scraper_input.location, - "radius": scraper_input.distance, "filter": 0, "start": 0 + page * 10, } + if scraper_input.distance: + params["radius"] = scraper_input.distance + sc_values = [] if scraper_input.is_remote: sc_values.append("attr(DSQF7)") @@ -65,12 +71,15 @@ class IndeedScraper(Scraper): if sc_values: params["sc"] = "0kf:" + "".join(sc_values) + ";" - response = session.get(self.url + "/jobs", params=params) + response = session.get(self.url + "/jobs", params=params, allow_redirects=True) + # print(response.status_code) - if response.status_code != 200 and response.status_code != 307: + if response.status_code not in range(200, 400): raise StatusException(response.status_code) soup = BeautifulSoup(response.content, "html.parser") + with open("text2.html", "w", encoding="utf-8") as f: + f.write(str(soup)) if "did not match any jobs" in str(soup): raise ParsingException("Search did not match any jobs") @@ -92,8 +101,6 @@ class IndeedScraper(Scraper): if job_url in self.seen_urls: return None - snippet_html = BeautifulSoup(job["snippet"], "html.parser") - extracted_salary = job.get("extractedSalary") compensation = None if extracted_salary: @@ -118,11 +125,12 @@ class IndeedScraper(Scraper): date_posted = date_posted.strftime("%Y-%m-%d") description = self.get_description(job_url, session) - li_elements = snippet_html.find_all("li") - if description is None and li_elements: - description = " ".join(li.text for li in li_elements) + with io.StringIO(job["snippet"]) as f: + soup = BeautifulSoup(f, "html.parser") + li_elements = soup.find_all("li") + if description is None and li_elements: + description = " ".join(li.text for li in li_elements) - first_li = snippet_html.find("li") job_post = JobPost( title=job["normTitle"], description=description, @@ -130,6 +138,7 @@ class IndeedScraper(Scraper): location=Location( city=job.get("jobLocationCity"), state=job.get("jobLocationState"), + country=self.country, ), job_type=job_type, compensation=compensation, @@ -138,7 +147,7 @@ class IndeedScraper(Scraper): ) return job_post - with ThreadPoolExecutor(max_workers=10) as executor: + with ThreadPoolExecutor(max_workers=1) as executor: job_results: list[Future] = [ executor.submit(process_job, job) for job in jobs["metaData"]["mosaicProviderJobCardsModel"]["results"] @@ -166,7 +175,7 @@ class IndeedScraper(Scraper): #: get first page to initialize session job_list, total_results = self.scrape_page(scraper_input, 0, session) - with ThreadPoolExecutor(max_workers=10) as executor: + with ThreadPoolExecutor(max_workers=1) as executor: futures: list[Future] = [ executor.submit(self.scrape_page, scraper_input, page, session) for page in range(1, pages_to_process + 1) @@ -188,6 +197,7 @@ class IndeedScraper(Scraper): error=f"Indeed failed to parse response: {e}", ) except Exception as e: + print(f"LinkedIn failed to scrape: {e}\n{traceback.format_exc()}") return JobResponse( success=False, error=f"Indeed failed to scrape: {e}", @@ -215,17 +225,25 @@ class IndeedScraper(Scraper): jk_value = params.get("jk", [None])[0] formatted_url = f"{self.url}/viewjob?jk={jk_value}&spa=1" - response = session.get(formatted_url, allow_redirects=True) + try: + response = session.get( + formatted_url, allow_redirects=True, timeout_seconds=5 + ) + except requests.exceptions.Timeout: + print("The request timed out.") + return None if response.status_code not in range(200, 400): + print("status code not in range") return None raw_description = response.json()["body"]["jobInfoWrapperModel"][ "jobInfoModel" ]["sanitizedJobDescription"] - soup = BeautifulSoup(raw_description, "html.parser") - text_content = " ".join(soup.get_text().split()).strip() - return text_content + with io.StringIO(raw_description) as f: + soup = BeautifulSoup(f, "html.parser") + text_content = " ".join(soup.get_text().split()).strip() + return text_content @staticmethod def get_job_type(job: dict) -> Optional[JobType]: @@ -237,13 +255,18 @@ class IndeedScraper(Scraper): for taxonomy in job["taxonomyAttributes"]: if taxonomy["label"] == "job-types": if len(taxonomy["attributes"]) > 0: - job_type_str = ( - taxonomy["attributes"][0]["label"] - .replace("-", "_") - .replace(" ", "_") - .upper() - ) - return JobType[job_type_str] + label = taxonomy["attributes"][0].get("label") + if label: + job_type_str = label.replace("-", "").replace(" ", "").lower() + # print(f"Debug: job_type_str = {job_type_str}") + return IndeedScraper.get_enum_from_value(job_type_str) + return None + + @staticmethod + def get_enum_from_value(value_str): + for job_type in JobType: + if value_str in job_type.value: + return job_type return None @staticmethod @@ -294,7 +317,7 @@ class IndeedScraper(Scraper): :param soup: :return: total_num_jobs """ - script = soup.find("script", string=lambda t: "window._initialData" in t) + script = soup.find("script", string=lambda t: t and "window._initialData" in t) pattern = re.compile(r"window._initialData\s*=\s*({.*})\s*;", re.DOTALL) match = pattern.search(script.string) diff --git a/src/jobspy/scrapers/linkedin/__init__.py b/src/jobspy/scrapers/linkedin/__init__.py index e4e8bc9..03ca40f 100644 --- a/src/jobspy/scrapers/linkedin/__init__.py +++ b/src/jobspy/scrapers/linkedin/__init__.py @@ -22,8 +22,8 @@ class LinkedInScraper(Scraper): Initializes LinkedInScraper with the LinkedIn job search url """ site = Site(Site.LINKEDIN) - url = "https://www.linkedin.com" - super().__init__(site, url) + self.url = "https://www.linkedin.com" + super().__init__(site) def scrape(self, scraper_input: ScraperInput) -> JobResponse: """ @@ -31,6 +31,7 @@ class LinkedInScraper(Scraper): :param scraper_input: :return: job_response """ + self.country = "worldwide" job_list: list[JobPost] = [] seen_urls = set() page, processed_jobs, job_count = 0, 0, 0 @@ -104,7 +105,7 @@ class LinkedInScraper(Scraper): metadata_card = job_info.find( "div", class_="base-search-card__metadata" ) - location: Location = LinkedInScraper.get_location(metadata_card) + location: Location = self.get_location(metadata_card) datetime_tag = metadata_card.find( "time", class_="job-search-card__listdate" @@ -125,7 +126,7 @@ class LinkedInScraper(Scraper): job_url=job_url, job_type=job_type, compensation=Compensation( - interval=CompensationInterval.YEARLY, currency="USD" + interval=CompensationInterval.YEARLY, currency=None ), ) job_list.append(job_post) @@ -195,17 +196,24 @@ class LinkedInScraper(Scraper): employment_type = employment_type.lower() employment_type = employment_type.replace("-", "") - return JobType(employment_type) + return LinkedInScraper.get_enum_from_value(employment_type) return text_content, get_job_type(soup) @staticmethod - def get_location(metadata_card: Optional[Tag]) -> Location: + def get_enum_from_value(value_str): + for job_type in JobType: + if value_str in job_type.value: + return job_type + return None + + def get_location(self, metadata_card: Optional[Tag]) -> Location: """ Extracts the location data from the job metadata card. :param metadata_card :return: location """ + location = Location(country=self.country) if metadata_card is not None: location_tag = metadata_card.find( "span", class_="job-search-card__location" @@ -217,6 +225,7 @@ class LinkedInScraper(Scraper): location = Location( city=city, state=state, + country=self.country, ) return location diff --git a/src/jobspy/scrapers/ziprecruiter/__init__.py b/src/jobspy/scrapers/ziprecruiter/__init__.py index eb35686..881b123 100644 --- a/src/jobspy/scrapers/ziprecruiter/__init__.py +++ b/src/jobspy/scrapers/ziprecruiter/__init__.py @@ -1,6 +1,7 @@ import math import json import re +import traceback from datetime import datetime from typing import Optional, Tuple from urllib.parse import urlparse, parse_qs @@ -18,6 +19,7 @@ from ...jobs import ( Location, JobResponse, JobType, + Country, ) @@ -27,8 +29,8 @@ class ZipRecruiterScraper(Scraper): Initializes LinkedInScraper with the ZipRecruiter job search url """ site = Site(Site.ZIP_RECRUITER) - url = "https://www.ziprecruiter.com" - super().__init__(site, url) + self.url = "https://www.ziprecruiter.com" + super().__init__(site) self.jobs_per_page = 20 self.seen_urls = set() @@ -80,8 +82,10 @@ class ZipRecruiterScraper(Scraper): self.url + "/jobs-search", headers=ZipRecruiterScraper.headers(), params=params, + allow_redirects=True, ) + # print(response.status_code) if response.status_code != 200: raise StatusException(response.status_code) @@ -144,6 +148,7 @@ class ZipRecruiterScraper(Scraper): error=f"ZipRecruiter returned status code {e.status_code}", ) except Exception as e: + print(f"ZipRecruiter failed to scrape: {e}\n{traceback.format_exc()}") return JobResponse( success=False, error=f"ZipRecruiter failed to scrape: {e}", @@ -181,15 +186,12 @@ class ZipRecruiterScraper(Scraper): description = job.find("p", {"class": "job_snippet"}).text.strip() job_type_element = job.find("li", {"class": "perk_item perk_type"}) + job_type = None if job_type_element: job_type_text = ( job_type_element.text.strip().lower().replace("-", "").replace(" ", "") ) - if job_type_text == "contractor": - job_type_text = "contract" - job_type = JobType(job_type_text) - else: - job_type = None + job_type = ZipRecruiterScraper.get_job_type_enum(job_type_text) date_posted = ZipRecruiterScraper.get_date_posted(job) @@ -206,16 +208,17 @@ class ZipRecruiterScraper(Scraper): return job_post def process_job_js(self, job: dict) -> JobPost: - # Map the job data to the expected fields by the Pydantic model title = job.get("Title") description = BeautifulSoup( job.get("Snippet", "").strip(), "html.parser" ).get_text() company = job.get("OrgName") - location = Location(city=job.get("City"), state=job.get("State")) + location = Location( + city=job.get("City"), state=job.get("State"), country=Country.US_CANADA + ) try: - job_type = ZipRecruiterScraper.job_type_from_string( + job_type = ZipRecruiterScraper.get_job_type_enum( job.get("EmploymentType", "").replace("-", "_").lower() ) except ValueError: @@ -244,6 +247,7 @@ class ZipRecruiterScraper(Scraper): interval=CompensationInterval.YEARLY, min_amount=min_amount, max_amount=max_amount, + currency="USD/CAD", ) save_job_url = job.get("SaveJobURL", "") posted_time_match = re.search( @@ -270,17 +274,18 @@ class ZipRecruiterScraper(Scraper): return job_post @staticmethod - def job_type_from_string(value: str) -> Optional[JobType]: - if not value: - return None + def get_enum_from_value(value_str): + for job_type in JobType: + if value_str in job_type.value: + return job_type + return None - if value.lower() == "contractor": - value = "contract" - normalized_value = value.replace("_", "") - for item in JobType: - if item.value == normalized_value: - return item - raise ValueError(f"Invalid value for JobType: {value}") + @staticmethod + def get_job_type_enum(job_type_str: str) -> Optional[JobType]: + for job_type in JobType: + if job_type_str in job_type.value: + return job_type + return None def get_description(self, job_page_url: str) -> Tuple[Optional[str], Optional[str]]: """ @@ -289,11 +294,16 @@ class ZipRecruiterScraper(Scraper): :param session: :return: description or None, response url """ - response = self.session.get( - job_page_url, headers=ZipRecruiterScraper.headers(), allow_redirects=True - ) - if response.status_code not in range(200, 400): - return None, None + try: + response = self.session.get( + job_page_url, + headers=ZipRecruiterScraper.headers(), + allow_redirects=True, + timeout_seconds=5, + ) + except requests.exceptions.Timeout: + print("The request timed out.") + return None html_string = response.content soup_job = BeautifulSoup(html_string, "html.parser") @@ -375,7 +385,10 @@ class ZipRecruiterScraper(Scraper): amounts.append(amount) compensation = Compensation( - interval=interval, min_amount=min(amounts), max_amount=max(amounts) + interval=interval, + min_amount=min(amounts), + max_amount=max(amounts), + currency="USD/CAD", ) return compensation @@ -399,10 +412,7 @@ class ZipRecruiterScraper(Scraper): city, state = None, None else: city, state = None, None - return Location( - city=city, - state=state, - ) + return Location(city=city, state=state, country=Country.US_CANADA) @staticmethod def headers() -> dict: