From 6d2cdcf813cb295f250c1a7338521e8afc35095d Mon Sep 17 00:00:00 2001 From: djv Date: Sun, 1 Oct 2023 12:13:25 +0800 Subject: [PATCH] only keep indeed and add location.json --- examples/JobSpy_Demo.py | 51 +- examples/location.json | 3838 ++++++++++++++++++ src/jobspy/__init__.py | 16 +- src/jobspy/scrapers/exceptions.py | 8 - src/jobspy/scrapers/linkedin/__init__.py | 271 -- src/jobspy/scrapers/ziprecruiter/__init__.py | 474 --- src/tests/__init__.py | 0 src/tests/test_all.py | 12 - src/tests/test_indeed.py | 10 - src/tests/test_linkedin.py | 10 - src/tests/test_ziprecruiter.py | 11 - 11 files changed, 3869 insertions(+), 832 deletions(-) create mode 100644 examples/location.json delete mode 100644 src/jobspy/scrapers/linkedin/__init__.py delete mode 100644 src/jobspy/scrapers/ziprecruiter/__init__.py delete mode 100644 src/tests/__init__.py delete mode 100644 src/tests/test_all.py delete mode 100644 src/tests/test_indeed.py delete mode 100644 src/tests/test_linkedin.py delete mode 100644 src/tests/test_ziprecruiter.py diff --git a/examples/JobSpy_Demo.py b/examples/JobSpy_Demo.py index 9b2bfc0..1bf382a 100644 --- a/examples/JobSpy_Demo.py +++ b/examples/JobSpy_Demo.py @@ -1,17 +1,16 @@ +import json +import os + from jobspy import scrape_jobs import pandas as pd -jobs: pd.DataFrame = scrape_jobs( - # site_name=["indeed", "linkedin", "zip_recruiter"], - site_name=["indeed"], - search_term="software engineer", - location="Dallas, TX", - results_wanted=20, # be wary the higher it is, the more likey you'll get blocked (rotating proxy should work tho) - country_indeed='USA', - # offset=25 # start jobs from an offset (use if search failed and want to continue) - # proxy="http://34.120.172.140:8123", - proxy="http://crawler-gost-proxy.jobright-internal.com:8080", -) + +# load location list +def read_location_list(location_file): + with open(location_file) as f: + location_list = [location['name'] for location in json.load(f)] + return location_list + # formatting for pandas pd.set_option('display.max_columns', None) @@ -19,15 +18,23 @@ pd.set_option('display.max_rows', None) pd.set_option('display.width', None) pd.set_option('display.max_colwidth', 50) # set to 0 to see full job url / desc -# 1: output to console -print(jobs) +# fetch jobs for each location +locations = read_location_list('location.json') +for location in locations: + jobs: pd.DataFrame = scrape_jobs( + # site_name=["indeed", "linkedin", "zip_recruiter"], + site_name=["indeed"], + search_term="software engineer", + location=location, + results_wanted=30, + # be wary the higher it is, the more likey you'll get blocked (rotating proxy should work tho) + country_indeed='USA', + # offset=25 # start jobs from an offset (use if search failed and want to continue) + proxy="http://34.120.172.140:8123", + # proxy="http://crawler-gost-proxy.jobright-internal.com:8080", + ) -# 2: output to .csv -jobs.to_csv('./jobs.csv', index=False) -print('outputted to jobs.csv') - -# 3: output to .xlsx -# jobs.to_xlsx('jobs.xlsx', index=False) - -# 4: display in Jupyter Notebook (1. pip install jupyter 2. jupyter notebook) -# display(jobs) + if os.path.isfile('./jobs.csv'): + jobs.to_csv('./jobs.csv', index=False, mode='a', header=False) + else: + jobs.to_csv('./jobs.csv', index=False, mode='a', header=True) diff --git a/examples/location.json b/examples/location.json new file mode 100644 index 0000000..cf48d5e --- /dev/null +++ b/examples/location.json @@ -0,0 +1,3838 @@ +[ + { + "name": "new york,NY", + "id": "102571732" + }, + { + "name": "houston,TX", + "id": "103743442" + }, + { + "name": "chicago,IL", + "id": "103112676" + }, + { + "name": "brooklyn,NY", + "id": "104361728" + }, + { + "name": "los angeles,CA", + "id": "102448103" + }, + { + "name": "miami,FL", + "id": "102394087" + }, + { + "name": "san antonio,TX", + "id": "102396835" + }, + { + "name": "philadelphia,PA", + "id": "104937023" + }, + { + "name": "las vegas,NV", + "id": "100293800" + }, + { + "name": "bronx,NY", + "id": "106081027" + }, + { + "name": "phoenix,AZ", + "id": "100219842" + }, + { + "name": "dallas,TX", + "id": "104194190" + }, + { + "name": "san diego,CA", + "id": "103918656" + }, + { + "name": "minneapolis,MN", + "id": "103039849" + }, + { + "name": "san jose,CA", + "id": "106233382" + }, + { + "name": "denver,CO", + "id": "103736294" + }, + { + "name": "austin,TX", + "id": "104472865" + }, + { + "name": "st. louis,MO", + "id": "104428936" + }, + { + "name": "indianapolis,IN", + "id": "100871315" + }, + { + "name": "atlanta,GA", + "id": "106224388" + }, + { + "name": "tucson,AZ", + "id": "103752383" + }, + { + "name": "orlando,FL", + "id": "105142029" + }, + { + "name": "portland,OR", + "id": "104727230" + }, + { + "name": "seattle,WA", + "id": "104116203" + }, + { + "name": "san francisco,CA", + "id": "102277331" + }, + { + "name": "fort worth,TX", + "id": "100432370" + }, + { + "name": "jacksonville,FL", + "id": "100868799" + }, + { + "name": "milwaukee,WI", + "id": "105240372" + }, + { + "name": "cincinnati,OH", + "id": "106310628" + }, + { + "name": "charlotte,NC", + "id": "102264677" + }, + { + "name": "columbus,OH", + "id": "102812094" + }, + { + "name": "cleveland,OH", + "id": "102356711" + }, + { + "name": "fort lauderdale,FL", + "id": "105858804" + }, + { + "name": "sacramento,CA", + "id": "101857797" + }, + { + "name": "saint paul,MN", + "id": "102370339" + }, + { + "name": "el paso,TX", + "id": "102994360" + }, + { + "name": "louisville,KY", + "id": "108449684" + }, + { + "name": "tampa,FL", + "id": "105517665" + }, + { + "name": "memphis,TN", + "id": "100420597" + }, + { + "name": "pittsburgh,PA", + "id": "101351937" + }, + { + "name": "detroit,MI", + "id": "103624908" + }, + { + "name": "albuquerque,NM", + "id": "104055874" + }, + { + "name": "oklahoma city,OK", + "id": "101060990" + }, + { + "name": "washington,DC", + "id": "104383890" + }, + { + "name": "fresno,CA", + "id": "101272817" + }, + { + "name": "buffalo,NY", + "id": "103676418" + }, + { + "name": "kansas city,MO", + "id": "100868311" + }, + { + "name": "colorado springs,CO", + "id": "100182490" + }, + { + "name": "bakersfield,CA", + "id": "103987799" + }, + { + "name": "omaha,NE", + "id": "100739428" + }, + { + "name": "birmingham,AL", + "id": "102905961" + }, + { + "name": "raleigh,NC", + "id": "100197101" + }, + { + "name": "dayton,OH", + "id": "102636075" + }, + { + "name": "mesa,AZ", + "id": "100558512" + }, + { + "name": "rochester,NY", + "id": "106553046" + }, + { + "name": "long beach,CA", + "id": "106417510" + }, + { + "name": "staten island,NY", + "id": "107093877" + }, + { + "name": "salt lake city,UT", + "id": "103250458" + }, + { + "name": "virginia beach,VA", + "id": "106468467" + }, + { + "name": "nashville,TN", + "id": "105573479" + }, + { + "name": "pompano beach,FL", + "id": "105375326" + }, + { + "name": "hollywood,FL", + "id": "100035604" + }, + { + "name": "riverside,CA", + "id": "101807624" + }, + { + "name": "tulsa,OK", + "id": "101891230" + }, + { + "name": "wichita,KS", + "id": "100652883" + }, + { + "name": "honolulu,HI", + "id": "105879727" + }, + { + "name": "knoxville,TN", + "id": "104362759" + }, + { + "name": "aurora,CO", + "id": "106606877" + }, + { + "name": "new orleans,LA", + "id": "106689237" + }, + { + "name": "oakland,CA", + "id": "105883676" + }, + { + "name": "baton rouge,LA", + "id": "105831158" + }, + { + "name": "arlington,TX", + "id": "101920004" + }, + { + "name": "richmond,VA", + "id": "102114334" + }, + { + "name": "stockton,CA", + "id": "101412965" + }, + { + "name": "anaheim,CA", + "id": "103593861" + }, + { + "name": "grand rapids,MI", + "id": "100061294" + }, + { + "name": "santa ana,CA", + "id": "104575067" + }, + { + "name": "hialeah,FL", + "id": "105027134" + }, + { + "name": "spokane,WA", + "id": "106141823" + }, + { + "name": "saint petersburg,FL", + "id": "101759449" + }, + { + "name": "west palm beach,FL", + "id": "102574077" + }, + { + "name": "tacoma,WA", + "id": "104976816" + }, + { + "name": "toledo,OH", + "id": "105205203" + }, + { + "name": "spring,TX", + "id": "103029807" + }, + { + "name": "corpus christi,TX", + "id": "105385420" + }, + { + "name": "fort wayne,IN", + "id": "106502857" + }, + { + "name": "greensboro,NC", + "id": "106414689" + }, + { + "name": "littleton,CO", + "id": "101539691" + }, + { + "name": "marietta,GA", + "id": "102746040" + }, + { + "name": "vancouver,WA", + "id": "100919703" + }, + { + "name": "lexington,KY", + "id": "114499923" + }, + { + "name": "silver spring,MD", + "id": "106026178" + }, + { + "name": "glendale,AZ", + "id": "100820980" + }, + { + "name": "plano,TX", + "id": "100517351" + }, + { + "name": "newark,NJ", + "id": "103913444" + }, + { + "name": "reno,NV", + "id": "106693758" + }, + { + "name": "naples,FL", + "id": "106919338" + }, + { + "name": "tallahassee,FL", + "id": "103826715" + }, + { + "name": "madison,WI", + "id": "106816259" + }, + { + "name": "akron,OH", + "id": "100751619" + }, + { + "name": "mobile,AL", + "id": "102725182" + }, + { + "name": "scottsdale,AZ", + "id": "100855814" + }, + { + "name": "lincoln,NE", + "id": "106469669" + }, + { + "name": "henderson,NV", + "id": "104093424" + }, + { + "name": "durham,NC", + "id": "101915197" + }, + { + "name": "chandler,AZ", + "id": "106777968" + }, + { + "name": "modesto,CA", + "id": "101671045" + }, + { + "name": "pensacola,FL", + "id": "100939624" + }, + { + "name": "lubbock,TX", + "id": "104916159" + }, + { + "name": "katy,TX", + "id": "100015543" + }, + { + "name": "winston-salem,NC", + "id": "105231863" + }, + { + "name": "salem,OR", + "id": "102572422" + }, + { + "name": "laredo,TX", + "id": "102139061" + }, + { + "name": "jersey city,NJ", + "id": "102340689" + }, + { + "name": "norfolk,VA", + "id": "104167482" + }, + { + "name": "anchorage,AK", + "id": "101830238" + }, + { + "name": "chula vista,CA", + "id": "101748722" + }, + { + "name": "fayetteville,NC", + "id": "105388224" + }, + { + "name": "lakeland,FL", + "id": "105946785" + }, + { + "name": "san bernardino,CA", + "id": "100493522" + }, + { + "name": "boise,ID", + "id": "102381687" + }, + { + "name": "corona,CA", + "id": "105653395" + }, + { + "name": "sarasota,FL", + "id": "101696514" + }, + { + "name": "paradise,NV", + "id": "103184336" + }, + { + "name": "north hempstead,NY", + "id": "101079574" + }, + { + "name": "syracuse,NY", + "id": "103650796" + }, + { + "name": "savannah,GA", + "id": "104139580" + }, + { + "name": "lawrenceville,GA", + "id": "102112377" + }, + { + "name": "kissimmee,FL", + "id": "103000454" + }, + { + "name": "shreveport,LA", + "id": "106758579" + }, + { + "name": "garland,TX", + "id": "106010780" + }, + { + "name": "trenton,NJ", + "id": "106456622" + }, + { + "name": "fort myers,FL", + "id": "104948205" + }, + { + "name": "chesapeake,VA", + "id": "106976053" + }, + { + "name": "fontana,CA", + "id": "107125851" + }, + { + "name": "amarillo,TX", + "id": "101286062" + }, + { + "name": "springfield,MO", + "id": "104102413" + }, + { + "name": "irving,TX", + "id": "102387510" + }, + { + "name": "bradenton,FL", + "id": "104210745" + }, + { + "name": "lake worth,FL", + "id": "103594971" + }, + { + "name": "wilmington,DE", + "id": "105138576" + }, + { + "name": "boston,MA", + "id": "102380872" + }, + { + "name": "montgomery,AL", + "id": "105392391" + }, + { + "name": "north las vegas,NV", + "id": "102801014" + }, + { + "name": "little rock,AR", + "id": "104302746" + }, + { + "name": "fremont,CA", + "id": "106138232" + }, + { + "name": "irvine,CA", + "id": "103575230" + }, + { + "name": "des moines,IA", + "id": "105056705" + }, + { + "name": "oxnard,CA", + "id": "102866614" + }, + { + "name": "gilbert,AZ", + "id": "104278762" + }, + { + "name": "boca raton,FL", + "id": "103462227" + }, + { + "name": "ocala,FL", + "id": "101456977" + }, + { + "name": "aurora,IL", + "id": "104382747" + }, + { + "name": "providence,RI", + "id": "106094923" + }, + { + "name": "augusta,GA", + "id": "108959117" + }, + { + "name": "chattanooga,TN", + "id": "100380745" + }, + { + "name": "gainesville,FL", + "id": "101080674" + }, + { + "name": "brownsville,TX", + "id": "102713810" + }, + { + "name": "spring valley,NV", + "id": "104947139" + }, + { + "name": "moreno valley,CA", + "id": "102498537" + }, + { + "name": "sunrise manor,NV", + "id": "100115532" + }, + { + "name": "eugene,OR", + "id": "101560118" + }, + { + "name": "huntington beach,CA", + "id": "106683099" + }, + { + "name": "huntsville,AL", + "id": "105142920" + }, + { + "name": "whittier,CA", + "id": "106764122" + }, + { + "name": "port st. lucie,FL", + "id": "106921907" + }, + { + "name": "rockford,IL", + "id": "106717984" + }, + { + "name": "oceanside,CA", + "id": "104648452" + }, + { + "name": "erie,PA", + "id": "103427991" + }, + { + "name": "ogden,UT", + "id": "105219479" + }, + { + "name": "wilmington,NC", + "id": "105138576" + }, + { + "name": "woodbridge,VA", + "id": "102882984" + }, + { + "name": "beaverton,OR", + "id": "105599308" + }, + { + "name": "evansville,IN", + "id": "103041884" + }, + { + "name": "lancaster,CA", + "id": "102139694" + }, + { + "name": "salinas,CA", + "id": "102927973" + }, + { + "name": "worcester,MA", + "id": "104689951" + }, + { + "name": "newport news,VA", + "id": "100789246" + }, + { + "name": "green bay,WI", + "id": "105135127" + }, + { + "name": "yonkers,NY", + "id": "100076283" + }, + { + "name": "hayward,CA", + "id": "104146296" + }, + { + "name": "fort collins,CO", + "id": "104469362" + }, + { + "name": "york,PA", + "id": "103985207" + }, + { + "name": "jackson,MS", + "id": "100853037" + }, + { + "name": "grand prairie,TX", + "id": "105824153" + }, + { + "name": "torrance,CA", + "id": "100472399" + }, + { + "name": "columbus,GA", + "id": "102812094" + }, + { + "name": "glendale,CA", + "id": "105854906" + }, + { + "name": "garden grove,CA", + "id": "101479894" + }, + { + "name": "clearwater,FL", + "id": "103147193" + }, + { + "name": "escondido,CA", + "id": "105894845" + }, + { + "name": "palmdale,CA", + "id": "103726782" + }, + { + "name": "overland park,KS", + "id": "104027599" + }, + { + "name": "decatur,GA", + "id": "105420557" + }, + { + "name": "new haven,CT", + "id": "101911097" + }, + { + "name": "tempe,AZ", + "id": "101526294" + }, + { + "name": "mission,TX", + "id": "106574339" + }, + { + "name": "olympia,WA", + "id": "104651760" + }, + { + "name": "ontario,CA", + "id": "104039150" + }, + { + "name": "allentown,PA", + "id": "106588646" + }, + { + "name": "macon,GA", + "id": "114700719" + }, + { + "name": "roanoke,VA", + "id": "102318275" + }, + { + "name": "alpharetta,GA", + "id": "105409290" + }, + { + "name": "harrisburg,PA", + "id": "101287892" + }, + { + "name": "rancho cucamonga,CA", + "id": "105128225" + }, + { + "name": "el cajon,CA", + "id": "104918729" + }, + { + "name": "topeka,KS", + "id": "104003859" + }, + { + "name": "youngstown,OH", + "id": "103411473" + }, + { + "name": "flint,MI", + "id": "102916975" + }, + { + "name": "elk grove,CA", + "id": "107065252" + }, + { + "name": "schenectady,NY", + "id": "103556916" + }, + { + "name": "lansing,MI", + "id": "101377237" + }, + { + "name": "sioux falls,SD", + "id": "103695844" + }, + { + "name": "naperville,IL", + "id": "104455108" + }, + { + "name": "kalamazoo,MI", + "id": "101264055" + }, + { + "name": "murfreesboro,TN", + "id": "102069526" + }, + { + "name": "ann arbor,MI", + "id": "102965250" + }, + { + "name": "peoria,AZ", + "id": "102431848" + }, + { + "name": "cape coral,FL", + "id": "102314158" + }, + { + "name": "fredericksburg,VA", + "id": "100599804" + }, + { + "name": "everett,WA", + "id": "107024810" + }, + { + "name": "boynton beach,FL", + "id": "107017969" + }, + { + "name": "homestead,FL", + "id": "102889894" + }, + { + "name": "lakewood,CO", + "id": "101077878" + }, + { + "name": "pomona,CA", + "id": "104304551" + }, + { + "name": "north township,IN", + "id": "102603396" + }, + { + "name": "pasadena,CA", + "id": "105252199" + }, + { + "name": "fairfax,VA", + "id": "105449217" + }, + { + "name": "pueblo,CO", + "id": "101391017" + }, + { + "name": "clarksville,TN", + "id": "100097788" + }, + { + "name": "lafayette,LA", + "id": "106447438" + }, + { + "name": "albany,NY", + "id": "100074394" + }, + { + "name": "hyattsville,MD", + "id": "106800256" + }, + { + "name": "kansas city,KS", + "id": "106142749" + }, + { + "name": "sugar land,TX", + "id": "104899889" + }, + { + "name": "waco,TX", + "id": "103659815" + }, + { + "name": "kent,WA", + "id": "103463944" + }, + { + "name": "canton,OH", + "id": "102325347" + }, + { + "name": "paterson,NJ", + "id": "102390836" + }, + { + "name": "pasadena,TX", + "id": "105252199" + }, + { + "name": "south bend,IN", + "id": "106215234" + }, + { + "name": "mckinney,TX", + "id": "105896187" + }, + { + "name": "bridgeport,CT", + "id": "100199330" + }, + { + "name": "tyler,TX", + "id": "105879823" + }, + { + "name": "springfield,MA", + "id": "100632918" + }, + { + "name": "las cruces,NM", + "id": "104022305" + }, + { + "name": "beaumont,TX", + "id": "102249749" + }, + { + "name": "visalia,CA", + "id": "105921085" + }, + { + "name": "springfield,IL", + "id": "105843014" + }, + { + "name": "yuma,AZ", + "id": "104292855" + }, + { + "name": "edmond,OK", + "id": "105204418" + }, + { + "name": "orange,CA", + "id": "103539511" + }, + { + "name": "renton,WA", + "id": "103317020" + }, + { + "name": "mesquite,TX", + "id": "106133730" + }, + { + "name": "sunnyvale,CA", + "id": "104119503" + }, + { + "name": "miramar,FL", + "id": "103113317" + }, + { + "name": "melbourne,FL", + "id": "106033654" + }, + { + "name": "denton,TX", + "id": "102283463" + }, + { + "name": "round rock,TX", + "id": "105523803" + }, + { + "name": "columbia,MO", + "id": "105883107" + }, + { + "name": "newark,DE", + "id": "102543534" + }, + { + "name": "manassas,VA", + "id": "106481878" + }, + { + "name": "odessa,TX", + "id": "102130787" + }, + { + "name": "metairie,LA", + "id": "106681486" + }, + { + "name": "ramapo,NY", + "id": "101322201" + }, + { + "name": "peoria,IL", + "id": "101692508" + }, + { + "name": "saginaw,MI", + "id": "101474323" + }, + { + "name": "gaithersburg,MD", + "id": "105229445" + }, + { + "name": "inglewood,CA", + "id": "106099034" + }, + { + "name": "compton,CA", + "id": "101031668" + }, + { + "name": "fullerton,CA", + "id": "101192951" + }, + { + "name": "saint charles,MO", + "id": "101808229" + }, + { + "name": "hamilton,OH", + "id": "101688499" + }, + { + "name": "midland,TX", + "id": "104061736" + }, + { + "name": "cedar rapids,IA", + "id": "106585497" + }, + { + "name": "killeen,TX", + "id": "100268144" + }, + { + "name": "warren,MI", + "id": "106646796" + }, + { + "name": "santa maria,CA", + "id": "104912340" + }, + { + "name": "santa barbara,CA", + "id": "106474520" + }, + { + "name": "san mateo,CA", + "id": "100472083" + }, + { + "name": "mcallen,TX", + "id": "101984162" + }, + { + "name": "cary,NC", + "id": "104987206" + }, + { + "name": "bellevue,WA", + "id": "106619589" + }, + { + "name": "humble,TX", + "id": "101910282" + }, + { + "name": "rockville,MD", + "id": "100249151" + }, + { + "name": "victorville,CA", + "id": "104271920" + }, + { + "name": "conroe,TX", + "id": "105119609" + }, + { + "name": "olathe,KS", + "id": "106874392" + }, + { + "name": "west valley city,UT", + "id": "104252509" + }, + { + "name": "sterling heights,MI", + "id": "103294075" + }, + { + "name": "cumming,GA", + "id": "102661238" + }, + { + "name": "puyallup,WA", + "id": "101585368" + }, + { + "name": "muskegon,MI", + "id": "106856305" + }, + { + "name": "joliet,IL", + "id": "106801253" + }, + { + "name": "billings,MT", + "id": "104158430" + }, + { + "name": "racine,WI", + "id": "100566345" + }, + { + "name": "bloomington,IN", + "id": "106579831" + }, + { + "name": "simi valley,CA", + "id": "100581909" + }, + { + "name": "yakima,WA", + "id": "102629614" + }, + { + "name": "lake charles,LA", + "id": "105398665" + }, + { + "name": "abilene,TX", + "id": "101330561" + }, + { + "name": "hartford,CT", + "id": "101325776" + }, + { + "name": "concord,CA", + "id": "103507657" + }, + { + "name": "independence,MO", + "id": "100343577" + }, + { + "name": "bethlehem,PA", + "id": "105750544" + }, + { + "name": "roseville,CA", + "id": "102254190" + }, + { + "name": "cypress,TX", + "id": "100917653" + }, + { + "name": "stamford,CT", + "id": "106935132" + }, + { + "name": "toms river,NJ", + "id": "101435856" + }, + { + "name": "arvada,CO", + "id": "106357276" + }, + { + "name": "boulder,CO", + "id": "102597912" + }, + { + "name": "gainesville,GA", + "id": "101080674" + }, + { + "name": "asheville,NC", + "id": "101772619" + }, + { + "name": "frederick,MD", + "id": "106194195" + }, + { + "name": "frisco,TX", + "id": "105918502" + }, + { + "name": "surprise,AZ", + "id": "104385764" + }, + { + "name": "carrollton,TX", + "id": "106067336" + }, + { + "name": "myrtle beach,SC", + "id": "102898581" + }, + { + "name": "vallejo,CA", + "id": "105844446" + }, + { + "name": "berkeley,CA", + "id": "104481114" + }, + { + "name": "panama city,FL", + "id": "102886511" + }, + { + "name": "rochester,MN", + "id": "106553046" + }, + { + "name": "hemet,CA", + "id": "100090052" + }, + { + "name": "bellingham,WA", + "id": "105915368" + }, + { + "name": "broken arrow,OK", + "id": "100561768" + }, + { + "name": "longmont,CO", + "id": "105777453" + }, + { + "name": "elgin,IL", + "id": "104954196" + }, + { + "name": "duluth,MN", + "id": "105972365" + }, + { + "name": "appleton,WI", + "id": "104702338" + }, + { + "name": "falls church,VA", + "id": "100189931" + }, + { + "name": "high point,NC", + "id": "105916010" + }, + { + "name": "murrieta,CA", + "id": "106495243" + }, + { + "name": "santa clara,CA", + "id": "100075706" + }, + { + "name": "la puente,CA", + "id": "103485428" + }, + { + "name": "spring hill,FL", + "id": "105353499" + }, + { + "name": "new port richey,FL", + "id": "104659232" + }, + { + "name": "temecula,CA", + "id": "105644570" + }, + { + "name": "norman,OK", + "id": "100716176" + }, + { + "name": "west jordan,UT", + "id": "103058555" + }, + { + "name": "provo,UT", + "id": "106484743" + }, + { + "name": "kenosha,WI", + "id": "102684081" + }, + { + "name": "tuscaloosa,AL", + "id": "104024166" + }, + { + "name": "ventura,CA", + "id": "102044560" + }, + { + "name": "pearland,TX", + "id": "103960194" + }, + { + "name": "charlottesville,VA", + "id": "106669259" + }, + { + "name": "stone mountain,GA", + "id": "102771588" + }, + { + "name": "vista,CA", + "id": "100097790" + }, + { + "name": "downey,CA", + "id": "105599965" + }, + { + "name": "redding,CA", + "id": "106403603" + }, + { + "name": "costa mesa,CA", + "id": "105846586" + }, + { + "name": "waterbury,CT", + "id": "101969750" + }, + { + "name": "centennial,CO", + "id": "105173497" + }, + { + "name": "sparks,NV", + "id": "100799732" + }, + { + "name": "st. augustine,FL", + "id": "101620966" + }, + { + "name": "clovis,CA", + "id": "102923073" + }, + { + "name": "miami beach,FL", + "id": "102844048" + }, + { + "name": "lewisville,TX", + "id": "103865263" + }, + { + "name": "florissant,MO", + "id": "102763674" + }, + { + "name": "greenville,NC", + "id": "102419620" + }, + { + "name": "west covina,CA", + "id": "106144772" + }, + { + "name": "fargo,ND", + "id": "102300628" + }, + { + "name": "daly city,CA", + "id": "101421547" + }, + { + "name": "rock hill,SC", + "id": "104527030" + }, + { + "name": "delray beach,FL", + "id": "100420651" + }, + { + "name": "spartanburg,SC", + "id": "102621922" + }, + { + "name": "bothell,WA", + "id": "105284588" + }, + { + "name": "burbank,CA", + "id": "106549262" + }, + { + "name": "sandy springs,GA", + "id": "100955123" + }, + { + "name": "lowell,MA", + "id": "104107141" + }, + { + "name": "tracy,CA", + "id": "100762562" + }, + { + "name": "perris,CA", + "id": "106810187" + }, + { + "name": "chico,CA", + "id": "103958324" + }, + { + "name": "jurupa valley,CA", + "id": "103562079" + }, + { + "name": "fairfield,CA", + "id": "103278777" + }, + { + "name": "vero beach,FL", + "id": "106083832" + }, + { + "name": "sandy,UT", + "id": "103644525" + }, + { + "name": "norwalk,CA", + "id": "100317406" + }, + { + "name": "bend,OR", + "id": "101147714" + }, + { + "name": "cambridge,MA", + "id": "104597301" + }, + { + "name": "douglasville,GA", + "id": "101625094" + }, + { + "name": "carlsbad,CA", + "id": "104116056" + }, + { + "name": "davie,FL", + "id": "106339951" + }, + { + "name": "college station,TX", + "id": "103723584" + }, + { + "name": "palm bay,FL", + "id": "100177841" + }, + { + "name": "san leandro,CA", + "id": "101501634" + }, + { + "name": "concord,NC", + "id": "103507657" + }, + { + "name": "nampa,ID", + "id": "100748018" + }, + { + "name": "redwood city,CA", + "id": "107180219" + }, + { + "name": "springfield,VA", + "id": "104102413" + }, + { + "name": "bowling green,KY", + "id": "107023414" + }, + { + "name": "jackson,MI", + "id": "100853037" + }, + { + "name": "duluth,GA", + "id": "105972365" + }, + { + "name": "san angelo,TX", + "id": "105687936" + }, + { + "name": "largo,FL", + "id": "101117030" + }, + { + "name": "auburn,WA", + "id": "102450630" + }, + { + "name": "baytown,TX", + "id": "104504952" + }, + { + "name": "grand junction,CO", + "id": "103610874" + }, + { + "name": "antioch,CA", + "id": "106181710" + }, + { + "name": "gastonia,NC", + "id": "101760514" + }, + { + "name": "davenport,IA", + "id": "100918937" + }, + { + "name": "longview,TX", + "id": "103772536" + }, + { + "name": "laurel,MD", + "id": "106998201" + }, + { + "name": "wichita falls,TX", + "id": "102927921" + }, + { + "name": "springfield,OH", + "id": "100366443" + }, + { + "name": "idaho falls,ID", + "id": "101494361" + }, + { + "name": "albany,GA", + "id": "100074394" + }, + { + "name": "rialto,CA", + "id": "105946568" + }, + { + "name": "greeley,CO", + "id": "104578578" + }, + { + "name": "bell gardens,CA", + "id": "100072312" + }, + { + "name": "scranton,PA", + "id": "103507055" + }, + { + "name": "edison,NJ", + "id": "102003310" + }, + { + "name": "englewood,CO", + "id": "102909269" + }, + { + "name": "hesperia,CA", + "id": "101433035" + }, + { + "name": "vacaville,CA", + "id": "103065994" + }, + { + "name": "daytona beach,FL", + "id": "104779438" + }, + { + "name": "richardson,TX", + "id": "102164100" + }, + { + "name": "ypsilanti,MI", + "id": "105190062" + }, + { + "name": "lynchburg,VA", + "id": "105936994" + }, + { + "name": "madera,CA", + "id": "101406403" + }, + { + "name": "waukegan,IL", + "id": "103285405" + }, + { + "name": "arden-arcade,CA", + "id": "108084584" + }, + { + "name": "elizabeth,NJ", + "id": "104952875" + }, + { + "name": "dearborn,MI", + "id": "105053977" + }, + { + "name": "lafayette,IN", + "id": "102260538" + }, + { + "name": "santa cruz,CA", + "id": "101627389" + }, + { + "name": "spokane valley,WA", + "id": "102448241" + }, + { + "name": "merced,CA", + "id": "100338711" + }, + { + "name": "richmond,TX", + "id": "102114334" + }, + { + "name": "elkhart,IN", + "id": "100038974" + }, + { + "name": "rochester,MI", + "id": "102971931" + }, + { + "name": "livonia,MI", + "id": "102347297" + }, + { + "name": "lee's summit,MO", + "id": "101326518" + }, + { + "name": "charleston,WV", + "id": "107200988" + }, + { + "name": "winter haven,FL", + "id": "105589079" + }, + { + "name": "belleville,IL", + "id": "102832203" + }, + { + "name": "roswell,GA", + "id": "105654928" + }, + { + "name": "terre haute,IN", + "id": "102382641" + }, + { + "name": "bowie,MD", + "id": "103203402" + }, + { + "name": "hagerstown,MD", + "id": "102209407" + }, + { + "name": "columbia,MD", + "id": "105682819" + }, + { + "name": "keller,TX", + "id": "100065929" + }, + { + "name": "waukesha,WI", + "id": "104879916" + }, + { + "name": "new bedford,MA", + "id": "104112659" + }, + { + "name": "mission viejo,CA", + "id": "105138537" + }, + { + "name": "plainfield,IL", + "id": "103869364" + }, + { + "name": "south gate,CA", + "id": "105928078" + }, + { + "name": "sunrise,FL", + "id": "104430228" + }, + { + "name": "lawrence,KS", + "id": "105039158" + }, + { + "name": "napa,CA", + "id": "100341601" + }, + { + "name": "muncie,IN", + "id": "104078974" + }, + { + "name": "brockton,MA", + "id": "101837045" + }, + { + "name": "valdosta,GA", + "id": "100347068" + }, + { + "name": "anderson,SC", + "id": "102115537" + }, + { + "name": "lawton,OK", + "id": "105726670" + }, + { + "name": "allen,TX", + "id": "103312651" + }, + { + "name": "hawthorne,CA", + "id": "103708877" + }, + { + "name": "mansfield,OH", + "id": "103629915" + }, + { + "name": "hillsboro,OR", + "id": "101002866" + }, + { + "name": "lynnwood,WA", + "id": "102164689" + }, + { + "name": "lakewood township,NJ", + "id": "102340287" + }, + { + "name": "kennesaw,GA", + "id": "105480811" + }, + { + "name": "ballwin,MO", + "id": "101495759" + }, + { + "name": "palo alto,CA", + "id": "101876708" + }, + { + "name": "broomfield,CO", + "id": "101568126" + }, + { + "name": "quincy,MA", + "id": "105679612" + }, + { + "name": "battle creek,MI", + "id": "101506001" + }, + { + "name": "annapolis,MD", + "id": "104845239" + }, + { + "name": "johnson city,TN", + "id": "102440994" + }, + { + "name": "santa monica,CA", + "id": "103176146" + }, + { + "name": "jacksonville,NC", + "id": "100868799" + }, + { + "name": "acworth,GA", + "id": "101929532" + }, + { + "name": "missouri city,TX", + "id": "105306268" + }, + { + "name": "federal way,WA", + "id": "100960995" + }, + { + "name": "chapel hill,NC", + "id": "102229593" + }, + { + "name": "greenburgh,NY", + "id": "104272040" + }, + { + "name": "missoula,MT", + "id": "103023809" + }, + { + "name": "el monte,CA", + "id": "103245044" + }, + { + "name": "mcdonough,GA", + "id": "104977406" + }, + { + "name": "franklin,TN", + "id": "100822873" + }, + { + "name": "rapid city,SD", + "id": "101392951" + }, + { + "name": "lynn,MA", + "id": "103236430" + }, + { + "name": "lithonia,GA", + "id": "104116784" + }, + { + "name": "canton,MI", + "id": "106393480" + }, + { + "name": "kennewick,WA", + "id": "104041124" + }, + { + "name": "fort smith,AR", + "id": "102509050" + }, + { + "name": "des plaines,IL", + "id": "102665576" + }, + { + "name": "westminster,CA", + "id": "105780205" + }, + { + "name": "holland,MI", + "id": "105617297" + }, + { + "name": "jupiter,FL", + "id": "100638551" + }, + { + "name": "o'fallon,MO", + "id": "105122862" + }, + { + "name": "fayetteville,AR", + "id": "105633049" + }, + { + "name": "bryan,TX", + "id": "106621743" + }, + { + "name": "san marcos,CA", + "id": "101031662" + }, + { + "name": "woodstock,GA", + "id": "103587714" + }, + { + "name": "fort pierce,FL", + "id": "101024390" + }, + { + "name": "rio rancho,NM", + "id": "105787306" + }, + { + "name": "cleveland,TN", + "id": "102356711" + }, + { + "name": "champaign,IL", + "id": "104471618" + }, + { + "name": "deltona,FL", + "id": "104231779" + }, + { + "name": "orem,UT", + "id": "100684843" + }, + { + "name": "midlothian,VA", + "id": "106001926" + }, + { + "name": "fall river,MA", + "id": "103857912" + }, + { + "name": "apopka,FL", + "id": "101581975" + }, + { + "name": "farmington,MI", + "id": "103689979" + }, + { + "name": "decatur,IL", + "id": "106469127" + }, + { + "name": "morgantown,WV", + "id": "101334736" + }, + { + "name": "indio,CA", + "id": "104910063" + }, + { + "name": "harlingen,TX", + "id": "101433488" + }, + { + "name": "kingsport,TN", + "id": "105341958" + }, + { + "name": "herndon,VA", + "id": "104624893" + }, + { + "name": "saint joseph,MO", + "id": "102418149" + }, + { + "name": "lima,OH", + "id": "102972275" + }, + { + "name": "poughkeepsie,NY", + "id": "109277107" + }, + { + "name": "monroe,LA", + "id": "100037994" + }, + { + "name": "medford,OR", + "id": "103970649" + }, + { + "name": "nashua,NH", + "id": "106489956" + }, + { + "name": "jackson,TN", + "id": "100853037" + }, + { + "name": "clarkstown,NY", + "id": "106312599" + }, + { + "name": "cheyenne,WY", + "id": "103451076" + }, + { + "name": "covington,GA", + "id": "102999245" + }, + { + "name": "norristown,PA", + "id": "101237217" + }, + { + "name": "westerville,OH", + "id": "104002166" + }, + { + "name": "sioux city,IA", + "id": "102757216" + }, + { + "name": "bloomington,MN", + "id": "106579831" + }, + { + "name": "upper marlboro,MD", + "id": "103868754" + }, + { + "name": "springdale,AR", + "id": "105622005" + }, + { + "name": "riverview,FL", + "id": "100058739" + }, + { + "name": "norwalk,CT", + "id": "106321469" + }, + { + "name": "williamsburg,VA", + "id": "105254702" + }, + { + "name": "gary,IN", + "id": "105644330" + }, + { + "name": "buford,GA", + "id": "104789802" + }, + { + "name": "slidell,LA", + "id": "100340805" + }, + { + "name": "hoover,AL", + "id": "102104135" + }, + { + "name": "lehigh acres,FL", + "id": "103246512" + }, + { + "name": "parker,CO", + "id": "102328257" + }, + { + "name": "bremerton,WA", + "id": "102105473" + }, + { + "name": "newnan,GA", + "id": "107111363" + }, + { + "name": "pittsburg,CA", + "id": "106400089" + }, + { + "name": "cicero,IL", + "id": "101570870" + }, + { + "name": "suffolk,VA", + "id": "100520660" + }, + { + "name": "flagstaff,AZ", + "id": "103868254" + }, + { + "name": "johns creek,GA", + "id": "103429133" + }, + { + "name": "bloomington,IL", + "id": "103755962" + }, + { + "name": "westland,MI", + "id": "106967839" + }, + { + "name": "meridian,ID", + "id": "103623513" + }, + { + "name": "gardena,CA", + "id": "104728272" + }, + { + "name": "citrus heights,CA", + "id": "100808801" + }, + { + "name": "sumter,SC", + "id": "104416528" + }, + { + "name": "livermore,CA", + "id": "105684438" + }, + { + "name": "chino,CA", + "id": "102784667" + }, + { + "name": "easton,PA", + "id": "100937004" + }, + { + "name": "conyers,GA", + "id": "103102070" + }, + { + "name": "clifton,NJ", + "id": "104250152" + }, + { + "name": "gulfport,MS", + "id": "100960260" + }, + { + "name": "houma,LA", + "id": "104745515" + }, + { + "name": "milton,FL", + "id": "106542704" + }, + { + "name": "carson,CA", + "id": "104495945" + }, + { + "name": "alhambra,CA", + "id": "105345919" + }, + { + "name": "glen burnie,MD", + "id": "102607325" + }, + { + "name": "germantown,MD", + "id": "101315878" + }, + { + "name": "loveland,CO", + "id": "100020699" + }, + { + "name": "hattiesburg,MS", + "id": "101011598" + }, + { + "name": "walnut creek,CA", + "id": "103627077" + }, + { + "name": "watsonville,CA", + "id": "100323944" + }, + { + "name": "ashburn,VA", + "id": "105458072" + }, + { + "name": "binghamton,NY", + "id": "114386774" + }, + { + "name": "clermont,FL", + "id": "101313574" + }, + { + "name": "maryville,TN", + "id": "105420299" + }, + { + "name": "owensboro,KY", + "id": "106845180" + }, + { + "name": "jonesboro,GA", + "id": "104815516" + }, + { + "name": "warner robins,GA", + "id": "106041429" + }, + { + "name": "eau claire,WI", + "id": "105162161" + }, + { + "name": "farmington hills,MI", + "id": "104093580" + }, + { + "name": "troy,MI", + "id": "104041729" + }, + { + "name": "danbury,CT", + "id": "101065316" + }, + { + "name": "tustin,CA", + "id": "103298276" + }, + { + "name": "oshkosh,WI", + "id": "111241134" + }, + { + "name": "brooklyn park,MN", + "id": "106864672" + }, + { + "name": "dalton,GA", + "id": "103176165" + }, + { + "name": "dothan,AL", + "id": "100899453" + }, + { + "name": "ellicott city,MD", + "id": "107014250" + }, + { + "name": "buena park,CA", + "id": "104061197" + }, + { + "name": "bethesda,MD", + "id": "102994144" + }, + { + "name": "arlington heights,IL", + "id": "103913534" + }, + { + "name": "lakewood,CA", + "id": "100599196" + }, + { + "name": "sterling,VA", + "id": "103715657" + }, + { + "name": "brandon,FL", + "id": "103052248" + }, + { + "name": "macomb township,MI", + "id": "104804249" + }, + { + "name": "palm coast,FL", + "id": "100580179" + }, + { + "name": "dallas,GA", + "id": "104194190" + }, + { + "name": "anderson,IN", + "id": "104889500" + }, + { + "name": "middletown,OH", + "id": "101120551" + }, + { + "name": "parma,OH", + "id": "101177044" + }, + { + "name": "evanston,IL", + "id": "103984459" + }, + { + "name": "hickory,NC", + "id": "101374314" + }, + { + "name": "greenwood,IN", + "id": "102295177" + }, + { + "name": "covina,CA", + "id": "103846327" + }, + { + "name": "new braunfels,TX", + "id": "101966928" + }, + { + "name": "utica,MI", + "id": "104073083" + }, + { + "name": "valparaiso,IN", + "id": "100385918" + }, + { + "name": "thousand oaks,CA", + "id": "101566268" + }, + { + "name": "sanford,FL", + "id": "105135351" + }, + { + "name": "bismarck,ND", + "id": "102737163" + }, + { + "name": "waldorf,MD", + "id": "104275809" + }, + { + "name": "pocatello,ID", + "id": "104810528" + }, + { + "name": "bloomington township,IL", + "id": "100818108" + }, + { + "name": "gresham,OR", + "id": "101777201" + }, + { + "name": "palatine,IL", + "id": "106857024" + }, + { + "name": "camarillo,CA", + "id": "100573561" + }, + { + "name": "iowa city,IA", + "id": "105410114" + }, + { + "name": "apple valley,CA", + "id": "107099397" + }, + { + "name": "warwick,RI", + "id": "103614713" + }, + { + "name": "turlock,CA", + "id": "104337712" + }, + { + "name": "jonesboro,AR", + "id": "104815516" + }, + { + "name": "conway,AR", + "id": "106638997" + }, + { + "name": "redmond,WA", + "id": "104145663" + }, + { + "name": "stafford,VA", + "id": "108588129" + }, + { + "name": "carmel,IN", + "id": "104433150" + }, + { + "name": "upland,CA", + "id": "105884182" + }, + { + "name": "porterville,CA", + "id": "102240217" + }, + { + "name": "lexington,NC", + "id": "114499923" + }, + { + "name": "monroe,NC", + "id": "100037994" + }, + { + "name": "yuba city,CA", + "id": "106728758" + }, + { + "name": "bellflower,CA", + "id": "101381105" + }, + { + "name": "baldwin park,CA", + "id": "101096881" + }, + { + "name": "lawrence,MA", + "id": "106029792" + }, + { + "name": "norcross,GA", + "id": "104347632" + }, + { + "name": "schaumburg,IL", + "id": "106026973" + }, + { + "name": "warren,OH", + "id": "103974414" + }, + { + "name": "southfield,MI", + "id": "106322635" + }, + { + "name": "smyrna,GA", + "id": "102658668" + }, + { + "name": "greer,SC", + "id": "100330403" + }, + { + "name": "redondo beach,CA", + "id": "102513763" + }, + { + "name": "huntington park,CA", + "id": "104761922" + }, + { + "name": "huntington,WV", + "id": "106683099" + }, + { + "name": "st. george,UT", + "id": "102608675" + }, + { + "name": "avondale,AZ", + "id": "105733947" + }, + { + "name": "chino hills,CA", + "id": "103940382" + }, + { + "name": "kirkland,WA", + "id": "103255706" + }, + { + "name": "wilkes-barre,PA", + "id": "105601521" + }, + { + "name": "mountain view,CA", + "id": "102250832" + }, + { + "name": "snellville,GA", + "id": "104435756" + }, + { + "name": "plant city,FL", + "id": "103434353" + }, + { + "name": "st. cloud,MN", + "id": "103709263" + }, + { + "name": "palm harbor,FL", + "id": "104162568" + }, + { + "name": "davis,CA", + "id": "102557838" + }, + { + "name": "simpsonville,SC", + "id": "103394331" + }, + { + "name": "manteca,CA", + "id": "106070069" + }, + { + "name": "rochester hills,MI", + "id": "100298570" + }, + { + "name": "brick,NJ", + "id": "102253011" + }, + { + "name": "springfield,OR", + "id": "101025218" + }, + { + "name": "somerville,MA", + "id": "104180134" + }, + { + "name": "alameda,CA", + "id": "103300978" + }, + { + "name": "redlands,CA", + "id": "100504469" + }, + { + "name": "leesburg,VA", + "id": "103637199" + }, + { + "name": "fishers,IN", + "id": "104387630" + }, + { + "name": "hammond,IN", + "id": "101894608" + }, + { + "name": "brighton,CO", + "id": "102998042" + }, + { + "name": "san ramon,CA", + "id": "102614122" + }, + { + "name": "new rochelle,NY", + "id": "103695990" + }, + { + "name": "bolingbrook,IL", + "id": "102958361" + }, + { + "name": "kokomo,IN", + "id": "103300008" + }, + { + "name": "new britain,CT", + "id": "103752532" + }, + { + "name": "marysville,WA", + "id": "105700352" + }, + { + "name": "temple,TX", + "id": "103364699" + }, + { + "name": "lodi,CA", + "id": "106503158" + }, + { + "name": "mount pleasant,SC", + "id": "101213942" + }, + { + "name": "janesville,WI", + "id": "102398661" + }, + { + "name": "waipahu,HI", + "id": "104585395" + }, + { + "name": "joplin,MO", + "id": "103620424" + }, + { + "name": "opa-locka,FL", + "id": "106348426" + }, + { + "name": "folsom,CA", + "id": "107149401" + }, + { + "name": "great falls,MT", + "id": "105928748" + }, + { + "name": "goldsboro,NC", + "id": "100215486" + }, + { + "name": "oviedo,FL", + "id": "104363348" + }, + { + "name": "rocky mount,NC", + "id": "100318798" + }, + { + "name": "lauderhill,FL", + "id": "104247550" + }, + { + "name": "rogers,AR", + "id": "107130493" + }, + { + "name": "utica,NY", + "id": "104073083" + }, + { + "name": "council bluffs,IA", + "id": "105425344" + }, + { + "name": "pleasanton,CA", + "id": "105538613" + }, + { + "name": "san tan valley,AZ", + "id": "100277898" + }, + { + "name": "north charleston,SC", + "id": "104481391" + }, + { + "name": "orange park,FL", + "id": "106061760" + }, + { + "name": "league city,TX", + "id": "102216463" + }, + { + "name": "waterford township,MI", + "id": "105358094" + }, + { + "name": "plainfield,NJ", + "id": "103869364" + }, + { + "name": "winter park,FL", + "id": "103931464" + }, + { + "name": "bessemer,AL", + "id": "105669244" + }, + { + "name": "pawtucket,RI", + "id": "102402267" + }, + { + "name": "denham springs,LA", + "id": "102427783" + }, + { + "name": "johnstown,PA", + "id": "104713073" + }, + { + "name": "dubuque,IA", + "id": "102325580" + }, + { + "name": "cherry hill,NJ", + "id": "112385851" + }, + { + "name": "troy,NY", + "id": "104041729" + }, + { + "name": "weston,FL", + "id": "107024457" + }, + { + "name": "cranston,RI", + "id": "103966042" + }, + { + "name": "pharr,TX", + "id": "105941691" + }, + { + "name": "petaluma,CA", + "id": "101583363" + }, + { + "name": "san rafael,CA", + "id": "104458657" + }, + { + "name": "winchester,VA", + "id": "103702054" + }, + { + "name": "traverse city,MI", + "id": "105302743" + }, + { + "name": "white plains,NY", + "id": "103138200" + }, + { + "name": "la mesa,CA", + "id": "101397892" + }, + { + "name": "passaic,NJ", + "id": "104780623" + }, + { + "name": "bossier city,LA", + "id": "101164781" + }, + { + "name": "mooresville,NC", + "id": "100846428" + }, + { + "name": "woodbury,MN", + "id": "101591884" + }, + { + "name": "parkville,MD", + "id": "106022602" + }, + { + "name": "lynwood,CA", + "id": "104889949" + }, + { + "name": "tulare,CA", + "id": "101291052" + }, + { + "name": "union city,CA", + "id": "103757091" + }, + { + "name": "aiken,SC", + "id": "105885998" + }, + { + "name": "shawnee,KS", + "id": "103454160" + }, + { + "name": "north little rock,AR", + "id": "103972861" + }, + { + "name": "dover,DE", + "id": "105894331" + }, + { + "name": "statesville,NC", + "id": "100678431" + }, + { + "name": "casper,WY", + "id": "104533488" + }, + { + "name": "dublin,OH", + "id": "102981272" + }, + { + "name": "casas adobes,AZ", + "id": "101961566" + }, + { + "name": "lake elsinore,CA", + "id": "104447278" + }, + { + "name": "pflugerville,TX", + "id": "103987260" + }, + { + "name": "brandon,MS", + "id": "103052248" + }, + { + "name": "port charlotte,FL", + "id": "105516091" + }, + { + "name": "centreville,VA", + "id": "106279809" + }, + { + "name": "jefferson city,MO", + "id": "102914612" + }, + { + "name": "palm beach gardens,FL", + "id": "102758771" + }, + { + "name": "framingham,MA", + "id": "100111223" + }, + { + "name": "kenner,LA", + "id": "105647996" + }, + { + "name": "pasco,WA", + "id": "101162452" + }, + { + "name": "sanford,NC", + "id": "105135351" + }, + { + "name": "tomball,TX", + "id": "101554549" + }, + { + "name": "lorain,OH", + "id": "103349451" + }, + { + "name": "grants pass,OR", + "id": "104818646" + }, + { + "name": "bay city,MI", + "id": "101719264" + }, + { + "name": "layton,UT", + "id": "105858690" + }, + { + "name": "matthews,NC", + "id": "104151576" + }, + { + "name": "la habra,CA", + "id": "102284726" + }, + { + "name": "fort mill,SC", + "id": "101767527" + }, + { + "name": "harrisonburg,VA", + "id": "103273895" + }, + { + "name": "columbus,IN", + "id": "102677672" + }, + { + "name": "castle rock,CO", + "id": "103640389" + }, + { + "name": "spring valley,CA", + "id": "104947139" + }, + { + "name": "mount vernon,NY", + "id": "100137766" + }, + { + "name": "huntington station,NY", + "id": "105473209" + }, + { + "name": "meridian,MS", + "id": "103623513" + }, + { + "name": "gadsden,AL", + "id": "100729925" + }, + { + "name": "mechanicsburg,PA", + "id": "104764460" + }, + { + "name": "goodyear,AZ", + "id": "104204200" + }, + { + "name": "stuart,FL", + "id": "103317667" + }, + { + "name": "west lafayette,IN", + "id": "106112649" + }, + { + "name": "milpitas,CA", + "id": "102068169" + }, + { + "name": "hendersonville,NC", + "id": "102831213" + }, + { + "name": "ithaca,NY", + "id": "109578071" + }, + { + "name": "eagan,MN", + "id": "102764192" + }, + { + "name": "yorba linda,CA", + "id": "101490915" + }, + { + "name": "union city,NJ", + "id": "103757091" + }, + { + "name": "state college,PA", + "id": "101811032" + }, + { + "name": "port orange,FL", + "id": "102252967" + }, + { + "name": "west bloomfield township,MI", + "id": "107781125" + }, + { + "name": "stockbridge,GA", + "id": "105009445" + }, + { + "name": "arcadia,CA", + "id": "107117755" + }, + { + "name": "cocoa,FL", + "id": "105206884" + }, + { + "name": "tamarac,FL", + "id": "105300969" + }, + { + "name": "massillon,OH", + "id": "102722565" + }, + { + "name": "glen allen,VA", + "id": "102126833" + }, + { + "name": "hanford,CA", + "id": "102779648" + }, + { + "name": "portland,ME", + "id": "104727230" + }, + { + "name": "corvallis,OR", + "id": "104571827" + }, + { + "name": "weatherford,TX", + "id": "103485107" + }, + { + "name": "cedar park,TX", + "id": "103543392" + }, + { + "name": "new bern,NC", + "id": "105746521" + }, + { + "name": "tinley park,IL", + "id": "101995828" + }, + { + "name": "cookeville,TN", + "id": "102442346" + }, + { + "name": "decatur,AL", + "id": "103670263" + }, + { + "name": "midland,MI", + "id": "104061736" + }, + { + "name": "flower mound,TX", + "id": "104841635" + }, + { + "name": "orland park,IL", + "id": "106587485" + }, + { + "name": "suwanee,GA", + "id": "107034722" + }, + { + "name": "blaine,MN", + "id": "103923602" + }, + { + "name": "noblesville,IN", + "id": "102263500" + }, + { + "name": "east orange,NJ", + "id": "105312778" + }, + { + "name": "elyria,OH", + "id": "102015952" + }, + { + "name": "punta gorda,FL", + "id": "106005156" + }, + { + "name": "brooksville,FL", + "id": "105055325" + }, + { + "name": "logan,UT", + "id": "106424145" + }, + { + "name": "west hartford,CT", + "id": "103449285" + }, + { + "name": "venice,FL", + "id": "101388401" + }, + { + "name": "levittown,PA", + "id": "100502260" + }, + { + "name": "fairbanks,AK", + "id": "104572344" + }, + { + "name": "south san francisco,CA", + "id": "103380759" + }, + { + "name": "east lansing,MI", + "id": "100550015" + }, + { + "name": "buckeye,AZ", + "id": "106644274" + }, + { + "name": "weslaco,TX", + "id": "105479985" + }, + { + "name": "san marcos,TX", + "id": "107016741" + }, + { + "name": "florence-graham,CA", + "id": "105944759" + }, + { + "name": "bel air,MD", + "id": "102910675" + }, + { + "name": "san clemente,CA", + "id": "106799235" + }, + { + "name": "madison,AL", + "id": "101775764" + }, + { + "name": "mechanicsville,VA", + "id": "101490468" + }, + { + "name": "la crosse,WI", + "id": "102640892" + }, + { + "name": "huntsville,TX", + "id": "105142920" + }, + { + "name": "sheboygan,WI", + "id": "106135880" + }, + { + "name": "san luis obispo,CA", + "id": "101118506" + }, + { + "name": "berwyn,IL", + "id": "103329505" + }, + { + "name": "altoona,PA", + "id": "100737258" + }, + { + "name": "laguna niguel,CA", + "id": "102546906" + }, + { + "name": "vienna,VA", + "id": "101627305" + }, + { + "name": "florence,AL", + "id": "101048704" + }, + { + "name": "eastvale,CA", + "id": "103198239" + }, + { + "name": "taylor,MI", + "id": "101835741" + }, + { + "name": "lufkin,TX", + "id": "101640508" + }, + { + "name": "bayonne,NJ", + "id": "104519072" + }, + { + "name": "newport beach,CA", + "id": "101655944" + }, + { + "name": "pico rivera,CA", + "id": "103073514" + }, + { + "name": "ames,IA", + "id": "103945718" + }, + { + "name": "pottstown,PA", + "id": "103440459" + }, + { + "name": "ewa beach,HI", + "id": "100560011" + }, + { + "name": "coon rapids,MN", + "id": "106138635" + }, + { + "name": "fayetteville,GA", + "id": "105388224" + }, + { + "name": "casa grande,AZ", + "id": "102489641" + }, + { + "name": "montebello,CA", + "id": "104720531" + }, + { + "name": "bay shore,NY", + "id": "101927337" + }, + { + "name": "clearfield,UT", + "id": "102792499" + }, + { + "name": "san gabriel,CA", + "id": "104157961" + }, + { + "name": "north miami,FL", + "id": "102010964" + }, + { + "name": "middletown,NY", + "id": "101120551" + }, + { + "name": "lakewood,WA", + "id": "100801227" + }, + { + "name": "rosemead,CA", + "id": "106575329" + }, + { + "name": "medina,OH", + "id": "102442913" + }, + { + "name": "north port,FL", + "id": "101490065" + }, + { + "name": "griffin,GA", + "id": "105727907" + }, + { + "name": "valrico,FL", + "id": "100233907" + }, + { + "name": "port orchard,WA", + "id": "100036878" + }, + { + "name": "ormond beach,FL", + "id": "101076143" + }, + { + "name": "carson city,NV", + "id": "106978129" + }, + { + "name": "north richland hills,TX", + "id": "107002436" + }, + { + "name": "deland,FL", + "id": "102570379" + }, + { + "name": "moore,OK", + "id": "104723995" + }, + { + "name": "saint cloud,FL", + "id": "103328830" + }, + { + "name": "caldwell,ID", + "id": "106086738" + }, + { + "name": "doral,FL", + "id": "102192838" + }, + { + "name": "conway,SC", + "id": "106638997" + }, + { + "name": "hamden,CT", + "id": "105785565" + }, + { + "name": "valley stream,NY", + "id": "102171049" + }, + { + "name": "biloxi,MS", + "id": "103699215" + }, + { + "name": "meriden,CT", + "id": "103123824" + }, + { + "name": "manhattan,KS", + "id": "105384541" + }, + { + "name": "west new york,NJ", + "id": "101826135" + }, + { + "name": "haverhill,MA", + "id": "101221414" + }, + { + "name": "eden prairie,MN", + "id": "105782582" + }, + { + "name": "north bergen,NJ", + "id": "101489834" + }, + { + "name": "fond du lac,WI", + "id": "105000184" + }, + { + "name": "brentwood,NY", + "id": "100227979" + }, + { + "name": "rancho cordova,CA", + "id": "104863738" + }, + { + "name": "cupertino,CA", + "id": "106758460" + }, + { + "name": "vineland,NJ", + "id": "103033268" + }, + { + "name": "texarkana,TX", + "id": "102103067" + }, + { + "name": "snohomish,WA", + "id": "105384688" + }, + { + "name": "mankato,MN", + "id": "101846878" + }, + { + "name": "waltham,MA", + "id": "101568447" + }, + { + "name": "crown point,IN", + "id": "105122134" + }, + { + "name": "pasadena,MD", + "id": "105252199" + }, + { + "name": "new iberia,LA", + "id": "105023620" + }, + { + "name": "euless,TX", + "id": "106759060" + }, + { + "name": "bristol,CT", + "id": "100299768" + }, + { + "name": "bellevue,NE", + "id": "102457028" + }, + { + "name": "danville,VA", + "id": "102052704" + }, + { + "name": "powder springs,GA", + "id": "104127965" + }, + { + "name": "burnsville,MN", + "id": "106788034" + }, + { + "name": "skokie,IL", + "id": "104075955" + }, + { + "name": "monterey park,CA", + "id": "106955147" + }, + { + "name": "mentor,OH", + "id": "100502186" + }, + { + "name": "millcreek,UT", + "id": "103006833" + }, + { + "name": "cartersville,GA", + "id": "100347246" + }, + { + "name": "easley,SC", + "id": "104878030" + }, + { + "name": "petersburg,VA", + "id": "100833436" + }, + { + "name": "round lake,IL", + "id": "103659837" + }, + { + "name": "south whittier,CA", + "id": "102620922" + }, + { + "name": "taylorsville,UT", + "id": "103780974" + }, + { + "name": "west allis,WI", + "id": "104280455" + }, + { + "name": "pontiac,MI", + "id": "104991134" + }, + { + "name": "wilson,NC", + "id": "104001298" + }, + { + "name": "woodland,CA", + "id": "107169578" + }, + { + "name": "carrollton,GA", + "id": "106067336" + }, + { + "name": "reston,VA", + "id": "104290390" + }, + { + "name": "kendale lakes,FL", + "id": "101699540" + }, + { + "name": "burleson,TX", + "id": "101452452" + }, + { + "name": "phenix city,AL", + "id": "101200025" + }, + { + "name": "azusa,CA", + "id": "106551858" + }, + { + "name": "saint clair shores,MI", + "id": "106796250" + }, + { + "name": "crystal lake,IL", + "id": "105808369" + }, + { + "name": "lilburn,GA", + "id": "101520516" + }, + { + "name": "newark,OH", + "id": "107042221" + }, + { + "name": "lancaster,OH", + "id": "105052252" + }, + { + "name": "malden,MA", + "id": "103166398" + }, + { + "name": "wheaton,IL", + "id": "102077563" + }, + { + "name": "greensburg,PA", + "id": "106622723" + }, + { + "name": "loganville,GA", + "id": "100220604" + }, + { + "name": "carmichael,CA", + "id": "104339825" + }, + { + "name": "hempstead,NY", + "id": "106943085" + }, + { + "name": "hendersonville,TN", + "id": "102831213" + }, + { + "name": "fontainebleau,FL", + "id": "105448137" + }, + { + "name": "new castle,DE", + "id": "103470446" + }, + { + "name": "lake forest,CA", + "id": "106201288" + }, + { + "name": "bartlett,TN", + "id": "104231204" + }, + { + "name": "apex,NC", + "id": "103871299" + }, + { + "name": "spring valley,NY", + "id": "104947139" + }, + { + "name": "the hammocks,FL", + "id": "105277738" + }, + { + "name": "albany,OR", + "id": "105134015" + }, + { + "name": "bozeman,MT", + "id": "104288072" + }, + { + "name": "martinsburg,WV", + "id": "103856641" + }, + { + "name": "wesley chapel,FL", + "id": "106556098" + }, + { + "name": "novato,CA", + "id": "104439723" + }, + { + "name": "grove city,OH", + "id": "105557188" + }, + { + "name": "east saint louis,IL", + "id": "103414016" + }, + { + "name": "tamiami,FL", + "id": "105067092" + }, + { + "name": "chicago heights,IL", + "id": "104396020" + }, + { + "name": "medford,MA", + "id": "106466161" + }, + { + "name": "chillicothe,OH", + "id": "106423490" + }, + { + "name": "farmington,NM", + "id": "103689979" + }, + { + "name": "dearborn heights,MI", + "id": "101047794" + }, + { + "name": "riverdale,GA", + "id": "105571889" + }, + { + "name": "howell,MI", + "id": "105520225" + }, + { + "name": "lake city,FL", + "id": "100681531" + }, + { + "name": "paducah,KY", + "id": "105406589" + }, + { + "name": "golden,CO", + "id": "106895393" + }, + { + "name": "yukon,OK", + "id": "100667382" + }, + { + "name": "lutz,FL", + "id": "102758368" + }, + { + "name": "lompoc,CA", + "id": "102056316" + }, + { + "name": "gilroy,CA", + "id": "100712040" + }, + { + "name": "midwest city,OK", + "id": "103486674" + }, + { + "name": "rocklin,CA", + "id": "102311830" + }, + { + "name": "royal oak,MI", + "id": "105228718" + }, + { + "name": "westminster,MD", + "id": "105780205" + }, + { + "name": "roswell,NM", + "id": "105654928" + }, + { + "name": "oak lawn,IL", + "id": "100148796" + }, + { + "name": "mchenry,IL", + "id": "103138751" + }, + { + "name": "goshen,IN", + "id": "101186281" + }, + { + "name": "new baltimore,MI", + "id": "100670402" + }, + { + "name": "castro valley,CA", + "id": "106391731" + }, + { + "name": "downers grove,IL", + "id": "101682691" + }, + { + "name": "colton,CA", + "id": "101465942" + }, + { + "name": "painesville,OH", + "id": "102692105" + }, + { + "name": "altamonte springs,FL", + "id": "105629525" + }, + { + "name": "marrero,LA", + "id": "105305774" + }, + { + "name": "st. charles,IL", + "id": "104833843" + }, + { + "name": "plymouth,MA", + "id": "101780693" + }, + { + "name": "freehold township,NJ", + "id": "103396290" + }, + { + "name": "butler,PA", + "id": "106598094" + }, + { + "name": "shoreline,WA", + "id": "105773780" + }, + { + "name": "rockwall,TX", + "id": "101365589" + }, + { + "name": "palm desert,CA", + "id": "104084539" + }, + { + "name": "blue springs,MO", + "id": "106883872" + }, + { + "name": "fountain valley,CA", + "id": "107169614" + }, + { + "name": "annandale,VA", + "id": "107591106" + }, + { + "name": "piscataway township,NJ", + "id": "107947409" + }, + { + "name": "mishawaka,IN", + "id": "102017374" + }, + { + "name": "monroe,MI", + "id": "106705322" + } +] \ No newline at end of file diff --git a/src/jobspy/__init__.py b/src/jobspy/__init__.py index 061d73b..3514db9 100644 --- a/src/jobspy/__init__.py +++ b/src/jobspy/__init__.py @@ -5,19 +5,11 @@ from typing import List, Tuple, Optional from .jobs import JobType, Location from .scrapers.indeed import IndeedScraper -from .scrapers.ziprecruiter import ZipRecruiterScraper -from .scrapers.linkedin import LinkedInScraper from .scrapers import ScraperInput, Site, JobResponse, Country -from .scrapers.exceptions import ( - LinkedInException, - IndeedException, - ZipRecruiterException, -) +from .scrapers.exceptions import IndeedException SCRAPER_MAPPING = { - Site.LINKEDIN: LinkedInScraper, Site.INDEED: IndeedScraper, - Site.ZIP_RECRUITER: ZipRecruiterScraper, } @@ -81,16 +73,12 @@ def scrape_jobs( try: scraped_data: JobResponse = scraper.scrape(scraper_input) - except (LinkedInException, IndeedException, ZipRecruiterException) as lie: + except IndeedException as lie: raise lie except Exception as e: # unhandled exceptions - if site == Site.LINKEDIN: - raise LinkedInException() if site == Site.INDEED: raise IndeedException() - if site == Site.ZIP_RECRUITER: - raise ZipRecruiterException() else: raise e return site.value, scraped_data diff --git a/src/jobspy/scrapers/exceptions.py b/src/jobspy/scrapers/exceptions.py index 2eb63b2..f543597 100644 --- a/src/jobspy/scrapers/exceptions.py +++ b/src/jobspy/scrapers/exceptions.py @@ -6,13 +6,5 @@ This module contains the set of Scrapers' exceptions. """ -class LinkedInException(Exception): - """Failed to scrape LinkedIn""" - - class IndeedException(Exception): """Failed to scrape Indeed""" - - -class ZipRecruiterException(Exception): - """Failed to scrape ZipRecruiter""" diff --git a/src/jobspy/scrapers/linkedin/__init__.py b/src/jobspy/scrapers/linkedin/__init__.py deleted file mode 100644 index 8331d36..0000000 --- a/src/jobspy/scrapers/linkedin/__init__.py +++ /dev/null @@ -1,271 +0,0 @@ -""" -jobspy.scrapers.linkedin -~~~~~~~~~~~~~~~~~~~ - -This module contains routines to scrape LinkedIn. -""" -from typing import Optional -from datetime import datetime - -import requests -import time -import re -from requests.exceptions import ProxyError -from concurrent.futures import ThreadPoolExecutor, as_completed -from bs4 import BeautifulSoup -from bs4.element import Tag -from threading import Lock - -from .. import Scraper, ScraperInput, Site -from ..exceptions import LinkedInException -from ...jobs import ( - JobPost, - Location, - JobResponse, - JobType, -) - - -def extract_emails_from_text(text: str) -> Optional[list[str]]: - if not text: - return None - email_regex = re.compile(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}") - return email_regex.findall(text) - - -class LinkedInScraper(Scraper): - MAX_RETRIES = 3 - DELAY = 10 - - def __init__(self, proxy: Optional[str] = None): - """ - Initializes LinkedInScraper with the LinkedIn job search url - """ - site = Site(Site.LINKEDIN) - self.country = "worldwide" - self.url = "https://www.linkedin.com" - super().__init__(site, proxy=proxy) - - def scrape(self, scraper_input: ScraperInput) -> JobResponse: - """ - Scrapes LinkedIn for jobs with scraper_input criteria - :param scraper_input: - :return: job_response - """ - job_list: list[JobPost] = [] - seen_urls = set() - url_lock = Lock() - page = scraper_input.offset // 25 + 25 if scraper_input.offset else 0 - - def job_type_code(job_type_enum): - mapping = { - JobType.FULL_TIME: "F", - JobType.PART_TIME: "P", - JobType.INTERNSHIP: "I", - JobType.CONTRACT: "C", - JobType.TEMPORARY: "T", - } - - return mapping.get(job_type_enum, "") - - while len(job_list) < scraper_input.results_wanted and page < 1000: - params = { - "keywords": scraper_input.search_term, - "location": scraper_input.location, - "distance": scraper_input.distance, - "f_WT": 2 if scraper_input.is_remote else None, - "f_JT": job_type_code(scraper_input.job_type) - if scraper_input.job_type - else None, - "pageNum": 0, - page: page + scraper_input.offset, - "f_AL": "true" if scraper_input.easy_apply else None, - } - - params = {k: v for k, v in params.items() if v is not None} - - params = {k: v for k, v in params.items() if v is not None} - retries = 0 - while retries < self.MAX_RETRIES: - try: - response = requests.get( - f"{self.url}/jobs-guest/jobs/api/seeMoreJobPostings/search?", - params=params, - allow_redirects=True, - proxies=self.proxy, - timeout=10, - ) - response.raise_for_status() - - break - except requests.HTTPError as e: - if hasattr(e, 'response') and e.response is not None: - if e.response.status_code == 429: - time.sleep(self.DELAY) - retries += 1 - continue - else: - raise LinkedInException(f"bad response status code: {e.response.status_code}") - else: - raise - except ProxyError as e: - raise LinkedInException("bad proxy") - except Exception as e: - raise LinkedInException(str(e)) - else: - # Raise an exception if the maximum number of retries is reached - raise LinkedInException("Max retries reached, failed to get a valid response") - - soup = BeautifulSoup(response.text, "html.parser") - - with ThreadPoolExecutor(max_workers=5) as executor: - futures = [] - for job_card in soup.find_all("div", class_="base-search-card"): - job_url = None - href_tag = job_card.find("a", class_="base-card__full-link") - if href_tag and "href" in href_tag.attrs: - href = href_tag.attrs["href"].split("?")[0] - job_id = href.split("-")[-1] - job_url = f"{self.url}/jobs/view/{job_id}" - - with url_lock: - if job_url in seen_urls: - continue - seen_urls.add(job_url) - - futures.append(executor.submit(self.process_job, job_card, job_url)) - - for future in as_completed(futures): - try: - job_post = future.result() - if job_post: - job_list.append(job_post) - except Exception as e: - raise LinkedInException("Exception occurred while processing jobs") - page += 25 - - job_list = job_list[: scraper_input.results_wanted] - return JobResponse(jobs=job_list) - - def process_job(self, job_card: Tag, job_url: str) -> Optional[JobPost]: - title_tag = job_card.find("span", class_="sr-only") - title = title_tag.get_text(strip=True) if title_tag else "N/A" - - company_tag = job_card.find("h4", class_="base-search-card__subtitle") - company_a_tag = company_tag.find("a") if company_tag else None - company = company_a_tag.get_text(strip=True) if company_a_tag else "N/A" - - metadata_card = job_card.find("div", class_="base-search-card__metadata") - location = self.get_location(metadata_card) - - datetime_tag = metadata_card.find("time", class_="job-search-card__listdate") if metadata_card else None - date_posted = None - if datetime_tag and "datetime" in datetime_tag.attrs: - datetime_str = datetime_tag["datetime"] - try: - date_posted = datetime.strptime(datetime_str, "%Y-%m-%d") - except Exception as e: - date_posted = None - benefits_tag = job_card.find("span", class_="result-benefits__text") - benefits = " ".join(benefits_tag.get_text().split()) if benefits_tag else None - - description, job_type = self.get_job_description(job_url) - - return JobPost( - title=title, - description=description, - company_name=company, - location=location, - date_posted=date_posted, - job_url=job_url, - job_type=job_type, - benefits=benefits, - emails=extract_emails_from_text(description) - ) - - def get_job_description(self, job_page_url: str) -> tuple[None, None] | tuple[ - str | None, tuple[str | None, JobType | None]]: - """ - Retrieves job description by going to the job page url - :param job_page_url: - :return: description or None - """ - try: - response = requests.get(job_page_url, timeout=5, proxies=self.proxy) - response.raise_for_status() - except Exception as e: - return None, None - - soup = BeautifulSoup(response.text, "html.parser") - div_content = soup.find( - "div", class_=lambda x: x and "show-more-less-html__markup" in x - ) - - description = None - if div_content: - description = " ".join(div_content.get_text().split()).strip() - - def get_job_type( - soup_job_type: BeautifulSoup, - ) -> JobType | None: - """ - Gets the job type from job page - :param soup_job_type: - :return: JobType - """ - h3_tag = soup_job_type.find( - "h3", - class_="description__job-criteria-subheader", - string=lambda text: "Employment type" in text, - ) - - employment_type = None - if h3_tag: - employment_type_span = h3_tag.find_next_sibling( - "span", - class_="description__job-criteria-text description__job-criteria-text--criteria", - ) - if employment_type_span: - employment_type = employment_type_span.get_text(strip=True) - employment_type = employment_type.lower() - employment_type = employment_type.replace("-", "") - - return LinkedInScraper.get_enum_from_value(employment_type) - - return description, get_job_type(soup) - - @staticmethod - def get_enum_from_value(value_str): - for job_type in JobType: - if value_str in job_type.value: - return job_type - return None - - def get_location(self, metadata_card: Optional[Tag]) -> Location: - """ - Extracts the location data from the job metadata card. - :param metadata_card - :return: location - """ - location = Location(country=self.country) - if metadata_card is not None: - location_tag = metadata_card.find( - "span", class_="job-search-card__location" - ) - location_string = location_tag.text.strip() if location_tag else "N/A" - parts = location_string.split(", ") - if len(parts) == 2: - city, state = parts - location = Location( - city=city, - state=state, - country=self.country, - ) - - return location - -def extract_emails_from_text(text: str) -> Optional[list[str]]: - if not text: - return None - email_regex = re.compile(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}") - return email_regex.findall(text) \ No newline at end of file diff --git a/src/jobspy/scrapers/ziprecruiter/__init__.py b/src/jobspy/scrapers/ziprecruiter/__init__.py deleted file mode 100644 index c1a0fee..0000000 --- a/src/jobspy/scrapers/ziprecruiter/__init__.py +++ /dev/null @@ -1,474 +0,0 @@ -""" -jobspy.scrapers.ziprecruiter -~~~~~~~~~~~~~~~~~~~ - -This module contains routines to scrape ZipRecruiter. -""" -import math -import json -import re -from datetime import datetime, date -from typing import Optional, Tuple, Any -from urllib.parse import urlparse, parse_qs, urlunparse - -import tls_client -import requests -from bs4 import BeautifulSoup -from bs4.element import Tag -from concurrent.futures import ThreadPoolExecutor, Future - -from .. import Scraper, ScraperInput, Site -from ..exceptions import ZipRecruiterException -from ...jobs import ( - JobPost, - Compensation, - CompensationInterval, - Location, - JobResponse, - JobType, - Country, -) - -def extract_emails_from_text(text: str) -> Optional[list[str]]: - if not text: - return None - email_regex = re.compile(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}") - return email_regex.findall(text) - - -class ZipRecruiterScraper(Scraper): - def __init__(self, proxy: Optional[str] = None): - """ - Initializes LinkedInScraper with the ZipRecruiter job search url - """ - site = Site(Site.ZIP_RECRUITER) - self.url = "https://www.ziprecruiter.com" - super().__init__(site, proxy=proxy) - - self.jobs_per_page = 20 - self.seen_urls = set() - self.session = tls_client.Session( - client_identifier="chrome112", random_tls_extension_order=True - ) - - def find_jobs_in_page( - self, scraper_input: ScraperInput, page: int - ) -> list[JobPost]: - """ - Scrapes a page of ZipRecruiter for jobs with scraper_input criteria - :param scraper_input: - :param page: - :return: jobs found on page - """ - job_list: list[JobPost] = [] - try: - response = self.session.get( - f"{self.url}/jobs-search", - headers=ZipRecruiterScraper.headers(), - params=ZipRecruiterScraper.add_params(scraper_input, page), - allow_redirects=True, - proxy=self.proxy, - timeout_seconds=10, - ) - if response.status_code != 200: - raise ZipRecruiterException( - f"bad response status code: {response.status_code}" - ) - except Exception as e: - if "Proxy responded with non 200 code" in str(e): - raise ZipRecruiterException("bad proxy") - raise ZipRecruiterException(str(e)) - else: - soup = BeautifulSoup(response.text, "html.parser") - js_tag = soup.find("script", {"id": "js_variables"}) - - if js_tag: - page_json = json.loads(js_tag.string) - jobs_list = page_json.get("jobList") - if jobs_list: - page_variant = "javascript" - # print('type javascript', len(jobs_list)) - else: - page_variant = "html_2" - jobs_list = soup.find_all("div", {"class": "job_content"}) - # print('type 2 html', len(jobs_list)) - else: - page_variant = "html_1" - jobs_list = soup.find_all("li", {"class": "job-listing"}) - # print('type 1 html', len(jobs_list)) - - with ThreadPoolExecutor(max_workers=10) as executor: - if page_variant == "javascript": - job_results = [ - executor.submit(self.process_job_javascript, job) - for job in jobs_list - ] - elif page_variant == "html_1": - job_results = [ - executor.submit(self.process_job_html_1, job) for job in jobs_list - ] - elif page_variant == "html_2": - job_results = [ - executor.submit(self.process_job_html_2, job) for job in jobs_list - ] - - job_list = [result.result() for result in job_results if result.result()] - return job_list - - def scrape(self, scraper_input: ScraperInput) -> JobResponse: - """ - Scrapes ZipRecruiter for jobs with scraper_input criteria - :param scraper_input: - :return: job_response - """ - start_page = (scraper_input.offset // self.jobs_per_page) + 1 if scraper_input.offset else 1 - #: get first page to initialize session - job_list: list[JobPost] = self.find_jobs_in_page(scraper_input, start_page) - pages_to_process = max( - 3, math.ceil(scraper_input.results_wanted / self.jobs_per_page) - ) - - with ThreadPoolExecutor(max_workers=10) as executor: - futures: list[Future] = [ - executor.submit(self.find_jobs_in_page, scraper_input, page) - for page in range(start_page + 1, start_page + pages_to_process + 2) - ] - - for future in futures: - jobs = future.result() - - job_list += jobs - - job_list = job_list[: scraper_input.results_wanted] - return JobResponse(jobs=job_list) - - def process_job_html_1(self, job: Tag) -> Optional[JobPost]: - """ - Parses a job from the job content tag - :param job: BeautifulSoup Tag for one job post - :return JobPost - TODO this method isnt finished due to not encountering this type of html often - """ - job_url = self.cleanurl(job.find("a", {"class": "job_link"})["href"]) - if job_url in self.seen_urls: - return None - - title = job.find("h2", {"class": "title"}).text - company = job.find("a", {"class": "company_name"}).text.strip() - - description, updated_job_url = self.get_description(job_url) - # job_url = updated_job_url if updated_job_url else job_url - if description is None: - description = job.find("p", {"class": "job_snippet"}).text.strip() - - job_type_element = job.find("li", {"class": "perk_item perk_type"}) - job_type = None - if job_type_element: - job_type_text = ( - job_type_element.text.strip().lower().replace("_", "").replace(" ", "") - ) - job_type = ZipRecruiterScraper.get_job_type_enum(job_type_text) - - date_posted = ZipRecruiterScraper.get_date_posted(job) - - job_post = JobPost( - title=title, - description=description, - company_name=company, - location=ZipRecruiterScraper.get_location(job), - job_type=job_type, - compensation=ZipRecruiterScraper.get_compensation(job), - date_posted=date_posted, - job_url=job_url, - emails=extract_emails_from_text(description), - ) - return job_post - - def process_job_html_2(self, job: Tag) -> Optional[JobPost]: - """ - Parses a job from the job content tag for a second variat of HTML that ZR uses - :param job: BeautifulSoup Tag for one job post - :return JobPost - """ - job_url = self.cleanurl(job.find("a", class_="job_link")["href"]) - title = job.find("h2", class_="title").text - company = job.find("a", class_="company_name").text.strip() - - description, updated_job_url = self.get_description(job_url) - # job_url = updated_job_url if updated_job_url else job_url - if description is None: - description = job.find("p", class_="job_snippet").get_text().strip() - - job_type_text = job.find("li", class_="perk_item perk_type") - job_type = None - if job_type_text: - job_type_text = ( - job_type_text.get_text() - .strip() - .lower() - .replace("-", "") - .replace(" ", "") - ) - job_type = ZipRecruiterScraper.get_job_type_enum(job_type_text) - date_posted = ZipRecruiterScraper.get_date_posted(job) - - job_post = JobPost( - title=title, - description=description, - company_name=company, - location=ZipRecruiterScraper.get_location(job), - job_type=job_type, - compensation=ZipRecruiterScraper.get_compensation(job), - date_posted=date_posted, - job_url=job_url, - ) - return job_post - - def process_job_javascript(self, job: dict) -> JobPost: - title = job.get("Title") - job_url = self.cleanurl(job.get("JobURL")) - - description, updated_job_url = self.get_description(job_url) - # job_url = updated_job_url if updated_job_url else job_url - if description is None: - description = BeautifulSoup( - job.get("Snippet", "").strip(), "html.parser" - ).get_text() - - company = job.get("OrgName") - location = Location( - city=job.get("City"), state=job.get("State"), country=Country.US_CANADA - ) - job_type = ZipRecruiterScraper.get_job_type_enum( - job.get("EmploymentType", "").replace("-", "").lower() - ) - - formatted_salary = job.get("FormattedSalaryShort", "") - salary_parts = formatted_salary.split(" ") - - min_salary_str = salary_parts[0][1:].replace(",", "") - if "." in min_salary_str: - min_amount = int(float(min_salary_str) * 1000) - else: - min_amount = int(min_salary_str.replace("K", "000")) - - if len(salary_parts) >= 3 and salary_parts[2].startswith("$"): - max_salary_str = salary_parts[2][1:].replace(",", "") - if "." in max_salary_str: - max_amount = int(float(max_salary_str) * 1000) - else: - max_amount = int(max_salary_str.replace("K", "000")) - else: - max_amount = 0 - - compensation = Compensation( - interval=CompensationInterval.YEARLY, - min_amount=min_amount, - max_amount=max_amount, - currency="USD/CAD", - ) - save_job_url = job.get("SaveJobURL", "") - posted_time_match = re.search( - r"posted_time=(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z)", save_job_url - ) - if posted_time_match: - date_time_str = posted_time_match.group(1) - date_posted_obj = datetime.strptime(date_time_str, "%Y-%m-%dT%H:%M:%SZ") - date_posted = date_posted_obj.date() - else: - date_posted = date.today() - - return JobPost( - title=title, - description=description, - company_name=company, - location=location, - job_type=job_type, - compensation=compensation, - date_posted=date_posted, - job_url=job_url, - ) - return job_post - - @staticmethod - def get_job_type_enum(job_type_str: str) -> Optional[JobType]: - for job_type in JobType: - if job_type_str in job_type.value: - a = True - return job_type - return None - - def get_description(self, job_page_url: str) -> Tuple[Optional[str], Optional[str]]: - """ - Retrieves job description by going to the job page url - :param job_page_url: - :param session: - :return: description or None, response url - """ - try: - response = requests.get( - job_page_url, - headers=ZipRecruiterScraper.headers(), - allow_redirects=True, - timeout=5, - proxies=self.proxy, - ) - if response.status_code not in range(200, 400): - return None, None - except Exception as e: - return None, None - - html_string = response.content - soup_job = BeautifulSoup(html_string, "html.parser") - - job_description_div = soup_job.find("div", {"class": "job_description"}) - if job_description_div: - return job_description_div.text.strip(), response.url - return None, response.url - - @staticmethod - def add_params(scraper_input, page) -> dict[str, str | Any]: - params = { - "search": scraper_input.search_term, - "location": scraper_input.location, - "page": page, - "form": "jobs-landing", - } - job_type_value = None - if scraper_input.job_type: - if scraper_input.job_type.value == "fulltime": - job_type_value = "full_time" - elif scraper_input.job_type.value == "parttime": - job_type_value = "part_time" - else: - job_type_value = scraper_input.job_type.value - - if job_type_value: - params[ - "refine_by_employment" - ] = f"employment_type:employment_type:{job_type_value}" - - if scraper_input.is_remote: - params["refine_by_location_type"] = "only_remote" - - if scraper_input.distance: - params["radius"] = scraper_input.distance - - return params - - @staticmethod - def get_interval(interval_str: str): - """ - Maps the interval alias to its appropriate CompensationInterval. - :param interval_str - :return: CompensationInterval - """ - interval_alias = {"annually": CompensationInterval.YEARLY} - interval_str = interval_str.lower() - - if interval_str in interval_alias: - return interval_alias[interval_str] - - return CompensationInterval(interval_str) - - @staticmethod - def get_date_posted(job: Tag) -> Optional[datetime.date]: - """ - Extracts the date a job was posted - :param job - :return: date the job was posted or None - """ - button = job.find( - "button", {"class": "action_input save_job zrs_btn_secondary_200"} - ) - if not button: - return None - - url_time = button.get("data-href", "") - url_components = urlparse(url_time) - params = parse_qs(url_components.query) - posted_time_str = params.get("posted_time", [None])[0] - - if posted_time_str: - posted_date = datetime.strptime( - posted_time_str, "%Y-%m-%dT%H:%M:%SZ" - ).date() - return posted_date - - return None - - @staticmethod - def get_compensation(job: Tag) -> Optional[Compensation]: - """ - Parses the compensation tag from the job BeautifulSoup object - :param job - :return: Compensation object or None - """ - pay_element = job.find("li", {"class": "perk_item perk_pay"}) - if pay_element is None: - return None - pay = pay_element.find("div", {"class": "value"}).find("span").text.strip() - - def create_compensation_object(pay_string: str) -> Compensation: - """ - Creates a Compensation object from a pay_string - :param pay_string - :return: compensation - """ - interval = ZipRecruiterScraper.get_interval(pay_string.split()[-1]) - - amounts = [] - for amount in pay_string.split("to"): - amount = amount.replace(",", "").strip("$ ").split(" ")[0] - if "K" in amount: - amount = amount.replace("K", "") - amount = int(float(amount)) * 1000 - else: - amount = int(float(amount)) - amounts.append(amount) - - compensation = Compensation( - interval=interval, - min_amount=min(amounts), - max_amount=max(amounts), - currency="USD/CAD", - ) - - return compensation - - return create_compensation_object(pay) - - @staticmethod - def get_location(job: Tag) -> Location: - """ - Extracts the job location from BeatifulSoup object - :param job: - :return: location - """ - location_link = job.find("a", {"class": "company_location"}) - if location_link is not None: - location_string = location_link.text.strip() - parts = location_string.split(", ") - if len(parts) == 2: - city, state = parts - else: - city, state = None, None - else: - city, state = None, None - return Location(city=city, state=state, country=Country.US_CANADA) - - @staticmethod - def headers() -> dict: - """ - Returns headers needed for requests - :return: dict - Dictionary containing headers - """ - return { - "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.97 Safari/537.36" - } - - @staticmethod - def cleanurl(url): - parsed_url = urlparse(url) - - return urlunparse((parsed_url.scheme, parsed_url.netloc, parsed_url.path, parsed_url.params, '', '')) diff --git a/src/tests/__init__.py b/src/tests/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/src/tests/test_all.py b/src/tests/test_all.py deleted file mode 100644 index 749be79..0000000 --- a/src/tests/test_all.py +++ /dev/null @@ -1,12 +0,0 @@ -from ..jobspy import scrape_jobs -import pandas as pd - - -def test_all(): - result = scrape_jobs( - site_name=["linkedin", "indeed", "zip_recruiter"], - search_term="software engineer", - results_wanted=5, - ) - - assert isinstance(result, pd.DataFrame) and not result.empty, "Result should be a non-empty DataFrame" diff --git a/src/tests/test_indeed.py b/src/tests/test_indeed.py deleted file mode 100644 index 8c6412e..0000000 --- a/src/tests/test_indeed.py +++ /dev/null @@ -1,10 +0,0 @@ -from ..jobspy import scrape_jobs -import pandas as pd - - -def test_indeed(): - result = scrape_jobs( - site_name="indeed", - search_term="software engineer", - ) - assert isinstance(result, pd.DataFrame) and not result.empty, "Result should be a non-empty DataFrame" diff --git a/src/tests/test_linkedin.py b/src/tests/test_linkedin.py deleted file mode 100644 index 5814134..0000000 --- a/src/tests/test_linkedin.py +++ /dev/null @@ -1,10 +0,0 @@ -from ..jobspy import scrape_jobs -import pandas as pd - - -def test_linkedin(): - result = scrape_jobs( - site_name="linkedin", - search_term="software engineer", - ) - assert isinstance(result, pd.DataFrame) and not result.empty, "Result should be a non-empty DataFrame" diff --git a/src/tests/test_ziprecruiter.py b/src/tests/test_ziprecruiter.py deleted file mode 100644 index f2d2212..0000000 --- a/src/tests/test_ziprecruiter.py +++ /dev/null @@ -1,11 +0,0 @@ -from ..jobspy import scrape_jobs -import pandas as pd - - -def test_ziprecruiter(): - result = scrape_jobs( - site_name="zip_recruiter", - search_term="software engineer", - ) - - assert isinstance(result, pd.DataFrame) and not result.empty, "Result should be a non-empty DataFrame"