enh(glassdoor): easy apply filter (#92)

pull/95/head^2 v1.1.37
Cullen Watson 2024-02-01 19:42:24 -06:00 committed by GitHub
parent b97c73ffd6
commit bbe46fe3f4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 11 additions and 19 deletions

View File

@ -69,7 +69,7 @@ Optional
├── is_remote (bool)
├── full_description (bool): fetches full description for Indeed / LinkedIn (much slower)
├── results_wanted (int): number of job results to retrieve for each site specified in 'site_type'
├── easy_apply (bool): filters for jobs that are hosted on LinkedIn
├── easy_apply (bool): filters for jobs that are hosted on LinkedIn, Glassdoor
├── country_indeed (enum): filters the country on Indeed (see below for correct spelling)
├── offset (num): starts the search from an offset (e.g. 25 will start the search from the 25th result)
```

18
poetry.lock generated
View File

@ -1053,16 +1053,6 @@ files = [
{file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"},
{file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"},
{file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"},
{file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f698de3fd0c4e6972b92290a45bd9b1536bffe8c6759c62471efaa8acb4c37bc"},
{file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aa57bd9cf8ae831a362185ee444e15a93ecb2e344c8e52e4d721ea3ab6ef1823"},
{file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffcc3f7c66b5f5b7931a5aa68fc9cecc51e685ef90282f4a82f0f5e9b704ad11"},
{file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47d4f1c5f80fc62fdd7777d0d40a2e9dda0a05883ab11374334f6c4de38adffd"},
{file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f67c7038d560d92149c060157d623c542173016c4babc0c1913cca0564b9939"},
{file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9aad3c1755095ce347e26488214ef77e0485a3c34a50c5a5e2471dff60b9dd9c"},
{file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:14ff806850827afd6b07a5f32bd917fb7f45b046ba40c57abdb636674a8b559c"},
{file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8f9293864fe09b8149f0cc42ce56e3f0e54de883a9de90cd427f191c346eb2e1"},
{file = "MarkupSafe-2.1.3-cp312-cp312-win32.whl", hash = "sha256:715d3562f79d540f251b99ebd6d8baa547118974341db04f5ad06d5ea3eb8007"},
{file = "MarkupSafe-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:1b8dd8c3fd14349433c79fa8abeb573a55fc0fdd769133baac1f5e07abf54aeb"},
{file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"},
{file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"},
{file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"},
@ -2270,13 +2260,13 @@ test = ["flake8", "isort", "pytest"]
[[package]]
name = "tls-client"
version = "0.2.1"
version = "1.0"
description = "Advanced Python HTTP Client."
optional = false
python-versions = "*"
files = [
{file = "tls_client-0.2.1-py3-none-any.whl", hash = "sha256:124a710952b979d5e20b4e2b7879b7958d6e48a259d0f5b83101055eb173f0bd"},
{file = "tls_client-0.2.1.tar.gz", hash = "sha256:473fb4c671d9d4ca6b818548ab6e955640dd589767bfce520830c5618c2f2e2b"},
{file = "tls_client-1.0-py3-none-any.whl", hash = "sha256:f1183f5e18cb31914bd62d11b350a33ea0293ea80fb91d69a3072821dece3e66"},
{file = "tls_client-1.0.tar.gz", hash = "sha256:7f6de48ad4a0ef69b72682c76ce604155971e07b4bfb2148a36276194ae3e7a0"},
]
[[package]]
@ -2445,4 +2435,4 @@ files = [
[metadata]
lock-version = "2.0"
python-versions = "^3.10"
content-hash = "f966f3979873eec2c3b13460067f5aa414c69aa8ab5cd3239c1cfa564fcb5deb"
content-hash = "404a77d78066cbb2ef71015562baf44aa11d12aac29a191c1ccc7758bfda598a"

View File

@ -1,6 +1,6 @@
[tool.poetry]
name = "python-jobspy"
version = "1.1.36"
version = "1.1.37"
description = "Job scraper for LinkedIn, Indeed, Glassdoor & ZipRecruiter"
authors = ["Zachary Hampton <zachary@bunsly.com>", "Cullen Watson <cullen@bunsly.com>"]
homepage = "https://github.com/Bunsly/JobSpy"
@ -13,7 +13,7 @@ packages = [
[tool.poetry.dependencies]
python = "^3.10"
requests = "^2.31.0"
tls-client = "^0.2.1"
tls-client = "*"
beautifulsoup4 = "^4.12.2"
pandas = "^2.1.0"
NUMPY = "1.24.2"

View File

@ -88,13 +88,14 @@ class GlassdoorScraper(Scraper):
def process_job(self, job_data):
"""Processes a single job and fetches its description."""
job_id = job_data["jobview"]["job"]["listingId"]
job_url = f'{self.url}/job-listing/?jl={job_id}'
job_url = f'{self.url}job-listing/j?jl={job_id}'
if job_url in self.seen_urls:
return None
self.seen_urls.add(job_url)
job = job_data["jobview"]
title = job["job"]["jobTitleText"]
company_name = job["header"]["employerNameFromSearch"]
company_id = job_data['jobview']['header']['employer']['id']
location_name = job["header"].get("locationName", "")
location_type = job["header"].get("locationType", "")
age_in_days = job["header"].get("ageInDays")
@ -115,6 +116,7 @@ class GlassdoorScraper(Scraper):
job_post = JobPost(
title=title,
company_url=f"{self.url}Overview/W-EI_IE{company_id}.htm" if company_id else None,
company_name=company_name,
date_posted=date_posted,
job_url=job_url,
@ -258,7 +260,7 @@ class GlassdoorScraper(Scraper):
"operationName": "JobSearchResultsQuery",
"variables": {
"excludeJobListingIds": [],
"filterParams": [],
"filterParams": [{"filterKey": "applicationType", "values": "1"}] if scraper_input.easy_apply else [],
"keyword": scraper_input.search_term,
"numJobsToShow": 30,
"locationType": location_type,