diff --git a/src/jobspy/__init__.py b/src/jobspy/__init__.py index 11500c7..d7ed0dc 100644 --- a/src/jobspy/__init__.py +++ b/src/jobspy/__init__.py @@ -192,4 +192,4 @@ def scrape_jobs( else: jobs_formatted_df = pd.DataFrame() - return jobs_formatted_df + return jobs_formatted_df.sort_values(by='date_posted', ascending=False) diff --git a/src/jobspy/scrapers/glassdoor/__init__.py b/src/jobspy/scrapers/glassdoor/__init__.py index cf539e0..3b3ec7a 100644 --- a/src/jobspy/scrapers/glassdoor/__init__.py +++ b/src/jobspy/scrapers/glassdoor/__init__.py @@ -100,7 +100,7 @@ class GlassdoorScraper(Scraper): location_type = job["header"].get("locationType", "") age_in_days = job["header"].get("ageInDays") is_remote, location = False, None - date_posted = (datetime.now() - timedelta(days=age_in_days)).date() if age_in_days else None + date_posted = (datetime.now() - timedelta(days=age_in_days)).date() if age_in_days is not None else None if location_type == "S": is_remote = True @@ -259,12 +259,18 @@ class GlassdoorScraper(Scraper): cursor: str | None = None, ) -> str: # `fromage` is the posting time filter in days - fromage = min(scraper_input.hours_old // 24, 1) if scraper_input.hours_old else None + fromage = max(scraper_input.hours_old // 24, 1) if scraper_input.hours_old else None + filter_params = [] + if scraper_input.easy_apply: + filter_params.append({"filterKey": "applicationType", "values": "1"}) + if fromage: + filter_params.append({"filterKey": "fromAge", "values": str(fromage)}) payload = { "operationName": "JobSearchResultsQuery", + "variables": { "excludeJobListingIds": [], - "filterParams": [{"filterKey": "applicationType", "values": "1"}] if scraper_input.easy_apply else [], + "filterParams": filter_params, "keyword": scraper_input.search_term, "numJobsToShow": 30, "locationType": location_type, @@ -272,7 +278,8 @@ class GlassdoorScraper(Scraper): "parameterUrlInput": f"IL.0,12_I{location_type}{location_id}", "pageNumber": page_num, "pageCursor": cursor, - "fromAge": fromage + "fromage": fromage, + "sort": "date" }, "query": "query JobSearchResultsQuery($excludeJobListingIds: [Long!], $keyword: String, $locationId: Int, $locationType: LocationTypeEnum, $numJobsToShow: Int!, $pageCursor: String, $pageNumber: Int, $filterParams: [FilterParams], $originalPageUrl: String, $seoFriendlyUrlInput: String, $parameterUrlInput: String, $seoUrl: Boolean) {\n jobListings(\n contextHolder: {searchParams: {excludeJobListingIds: $excludeJobListingIds, keyword: $keyword, locationId: $locationId, locationType: $locationType, numPerPage: $numJobsToShow, pageCursor: $pageCursor, pageNumber: $pageNumber, filterParams: $filterParams, originalPageUrl: $originalPageUrl, seoFriendlyUrlInput: $seoFriendlyUrlInput, parameterUrlInput: $parameterUrlInput, seoUrl: $seoUrl, searchType: SR}}\n ) {\n companyFilterOptions {\n id\n shortName\n __typename\n }\n filterOptions\n indeedCtk\n jobListings {\n ...JobView\n __typename\n }\n jobListingSeoLinks {\n linkItems {\n position\n url\n __typename\n }\n __typename\n }\n jobSearchTrackingKey\n jobsPageSeoData {\n pageMetaDescription\n pageTitle\n __typename\n }\n paginationCursors {\n cursor\n pageNumber\n __typename\n }\n indexablePageForSeo\n searchResultsMetadata {\n searchCriteria {\n implicitLocation {\n id\n localizedDisplayName\n type\n __typename\n }\n keyword\n location {\n id\n shortName\n localizedShortName\n localizedDisplayName\n type\n __typename\n }\n __typename\n }\n footerVO {\n countryMenu {\n childNavigationLinks {\n id\n link\n textKey\n __typename\n }\n __typename\n }\n __typename\n }\n helpCenterDomain\n helpCenterLocale\n jobAlert {\n jobAlertExists\n __typename\n }\n jobSerpFaq {\n questions {\n answer\n question\n __typename\n }\n __typename\n }\n jobSerpJobOutlook {\n occupation\n paragraph\n __typename\n }\n showMachineReadableJobs\n __typename\n }\n serpSeoLinksVO {\n relatedJobTitlesResults\n searchedJobTitle\n searchedKeyword\n searchedLocationIdAsString\n searchedLocationSeoName\n searchedLocationType\n topCityIdsToNameResults {\n key\n value\n __typename\n }\n topEmployerIdsToNameResults {\n key\n value\n __typename\n }\n topEmployerNameResults\n topOccupationResults\n __typename\n }\n totalJobsCount\n __typename\n }\n}\n\nfragment JobView on JobListingSearchResult {\n jobview {\n header {\n adOrderId\n advertiserType\n adOrderSponsorshipLevel\n ageInDays\n divisionEmployerName\n easyApply\n employer {\n id\n name\n shortName\n __typename\n }\n employerNameFromSearch\n goc\n gocConfidence\n gocId\n jobCountryId\n jobLink\n jobResultTrackingKey\n jobTitleText\n locationName\n locationType\n locId\n needsCommission\n payCurrency\n payPeriod\n payPeriodAdjustedPay {\n p10\n p50\n p90\n __typename\n }\n rating\n salarySource\n savedJobId\n sponsored\n __typename\n }\n job {\n descriptionFragments\n importConfigId\n jobTitleId\n jobTitleText\n listingId\n __typename\n }\n jobListingAdminDetails {\n cpcVal\n importConfigId\n jobListingId\n jobSourceId\n userEligibleForAdminJobDetails\n __typename\n }\n overview {\n shortName\n squareLogoUrl\n __typename\n }\n __typename\n }\n __typename\n}\n", } diff --git a/src/jobspy/scrapers/indeed/__init__.py b/src/jobspy/scrapers/indeed/__init__.py index d9fe611..11880e9 100644 --- a/src/jobspy/scrapers/indeed/__init__.py +++ b/src/jobspy/scrapers/indeed/__init__.py @@ -364,7 +364,7 @@ class IndeedScraper(Scraper): @staticmethod def add_params(scraper_input: ScraperInput, page: int) -> dict[str, str | Any]: # `fromage` is the posting time filter in days - fromage = min(scraper_input.hours_old // 24, 1) if scraper_input.hours_old else None + fromage = max(scraper_input.hours_old // 24, 1) if scraper_input.hours_old else None params = { "q": scraper_input.search_term, "l": scraper_input.location if scraper_input.location else scraper_input.country.value[0].split(',')[-1], diff --git a/src/jobspy/scrapers/ziprecruiter/__init__.py b/src/jobspy/scrapers/ziprecruiter/__init__.py index adf7207..1e21dd5 100644 --- a/src/jobspy/scrapers/ziprecruiter/__init__.py +++ b/src/jobspy/scrapers/ziprecruiter/__init__.py @@ -165,6 +165,9 @@ class ZipRecruiterScraper(Scraper): "search": scraper_input.search_term, "location": scraper_input.location, } + if scraper_input.hours_old: + fromage = max(scraper_input.hours_old // 24, 1) if scraper_input.hours_old else None + params['days'] = fromage job_type_value = None if scraper_input.job_type: if scraper_input.job_type.value == "fulltime":