diff --git a/README.md b/README.md index 9049ef5..e798335 100644 --- a/README.md +++ b/README.md @@ -69,7 +69,7 @@ Optional ├── is_remote (bool) ├── full_description (bool): fetches full description for Indeed / LinkedIn (much slower) ├── results_wanted (int): number of job results to retrieve for each site specified in 'site_type' -├── easy_apply (bool): filters for jobs that are hosted on LinkedIn +├── easy_apply (bool): filters for jobs that are hosted on LinkedIn, Glassdoor ├── country_indeed (enum): filters the country on Indeed (see below for correct spelling) ├── offset (num): starts the search from an offset (e.g. 25 will start the search from the 25th result) ``` diff --git a/poetry.lock b/poetry.lock index 7020fb2..d573844 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1053,16 +1053,6 @@ files = [ {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"}, {file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"}, {file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f698de3fd0c4e6972b92290a45bd9b1536bffe8c6759c62471efaa8acb4c37bc"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aa57bd9cf8ae831a362185ee444e15a93ecb2e344c8e52e4d721ea3ab6ef1823"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffcc3f7c66b5f5b7931a5aa68fc9cecc51e685ef90282f4a82f0f5e9b704ad11"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47d4f1c5f80fc62fdd7777d0d40a2e9dda0a05883ab11374334f6c4de38adffd"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f67c7038d560d92149c060157d623c542173016c4babc0c1913cca0564b9939"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9aad3c1755095ce347e26488214ef77e0485a3c34a50c5a5e2471dff60b9dd9c"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:14ff806850827afd6b07a5f32bd917fb7f45b046ba40c57abdb636674a8b559c"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8f9293864fe09b8149f0cc42ce56e3f0e54de883a9de90cd427f191c346eb2e1"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-win32.whl", hash = "sha256:715d3562f79d540f251b99ebd6d8baa547118974341db04f5ad06d5ea3eb8007"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:1b8dd8c3fd14349433c79fa8abeb573a55fc0fdd769133baac1f5e07abf54aeb"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"}, @@ -2270,13 +2260,13 @@ test = ["flake8", "isort", "pytest"] [[package]] name = "tls-client" -version = "0.2.1" +version = "1.0" description = "Advanced Python HTTP Client." optional = false python-versions = "*" files = [ - {file = "tls_client-0.2.1-py3-none-any.whl", hash = "sha256:124a710952b979d5e20b4e2b7879b7958d6e48a259d0f5b83101055eb173f0bd"}, - {file = "tls_client-0.2.1.tar.gz", hash = "sha256:473fb4c671d9d4ca6b818548ab6e955640dd589767bfce520830c5618c2f2e2b"}, + {file = "tls_client-1.0-py3-none-any.whl", hash = "sha256:f1183f5e18cb31914bd62d11b350a33ea0293ea80fb91d69a3072821dece3e66"}, + {file = "tls_client-1.0.tar.gz", hash = "sha256:7f6de48ad4a0ef69b72682c76ce604155971e07b4bfb2148a36276194ae3e7a0"}, ] [[package]] @@ -2445,4 +2435,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "f966f3979873eec2c3b13460067f5aa414c69aa8ab5cd3239c1cfa564fcb5deb" +content-hash = "404a77d78066cbb2ef71015562baf44aa11d12aac29a191c1ccc7758bfda598a" diff --git a/pyproject.toml b/pyproject.toml index 4d8e71e..049155d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "python-jobspy" -version = "1.1.36" +version = "1.1.37" description = "Job scraper for LinkedIn, Indeed, Glassdoor & ZipRecruiter" authors = ["Zachary Hampton ", "Cullen Watson "] homepage = "https://github.com/Bunsly/JobSpy" @@ -13,7 +13,7 @@ packages = [ [tool.poetry.dependencies] python = "^3.10" requests = "^2.31.0" -tls-client = "^0.2.1" +tls-client = "*" beautifulsoup4 = "^4.12.2" pandas = "^2.1.0" NUMPY = "1.24.2" diff --git a/src/jobspy/scrapers/glassdoor/__init__.py b/src/jobspy/scrapers/glassdoor/__init__.py index 69dd3e4..0852c2e 100644 --- a/src/jobspy/scrapers/glassdoor/__init__.py +++ b/src/jobspy/scrapers/glassdoor/__init__.py @@ -88,13 +88,14 @@ class GlassdoorScraper(Scraper): def process_job(self, job_data): """Processes a single job and fetches its description.""" job_id = job_data["jobview"]["job"]["listingId"] - job_url = f'{self.url}/job-listing/?jl={job_id}' + job_url = f'{self.url}job-listing/j?jl={job_id}' if job_url in self.seen_urls: return None self.seen_urls.add(job_url) job = job_data["jobview"] title = job["job"]["jobTitleText"] company_name = job["header"]["employerNameFromSearch"] + company_id = job_data['jobview']['header']['employer']['id'] location_name = job["header"].get("locationName", "") location_type = job["header"].get("locationType", "") age_in_days = job["header"].get("ageInDays") @@ -115,6 +116,7 @@ class GlassdoorScraper(Scraper): job_post = JobPost( title=title, + company_url=f"{self.url}Overview/W-EI_IE{company_id}.htm" if company_id else None, company_name=company_name, date_posted=date_posted, job_url=job_url, @@ -258,7 +260,7 @@ class GlassdoorScraper(Scraper): "operationName": "JobSearchResultsQuery", "variables": { "excludeJobListingIds": [], - "filterParams": [], + "filterParams": [{"filterKey": "applicationType", "values": "1"}] if scraper_input.easy_apply else [], "keyword": scraper_input.search_term, "numJobsToShow": 30, "locationType": location_type,