lock file (#173)

pull/176/head v1.1.60
Cullen Watson 2024-07-17 21:21:22 -05:00 committed by GitHub
parent 2a0cba8c7e
commit 60d4d911c9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 638 additions and 667 deletions

View File

@ -110,6 +110,9 @@ Optional
| |
├── country_indeed (str): ├── country_indeed (str):
| filters the country on Indeed & Glassdoor (see below for correct spelling) | filters the country on Indeed & Glassdoor (see below for correct spelling)
|
├── enforce_annual_salary (bool):
| converts wages to annual salary
``` ```
``` ```
@ -130,42 +133,42 @@ Optional
```plaintext ```plaintext
JobPost JobPost
├── title (str) ├── title
├── company (str) ├── company
├── company_url (str) ├── company_url
├── job_url (str) ├── job_url
├── location (object) ├── location
│ ├── country (str) │ ├── country
│ ├── city (str) │ ├── city
│ ├── state (str) │ ├── state
├── description (str) ├── description
├── job_type (str): fulltime, parttime, internship, contract ├── job_type: fulltime, parttime, internship, contract
├── job_function (str) ├── job_function
├── compensation (object) │ ├── interval: yearly, monthly, weekly, daily, hourly
│ ├── interval (str): yearly, monthly, weekly, daily, hourly │ ├── min_amount
│ ├── min_amount (int) │ ├── max_amount
│ ├── max_amount (int) │ ├── currency
│ └── currency (enum) │ └── salary_source: direct_data, description (parsed from posting)
├── date_posted (date) ├── date_posted
├── emails (str) ├── emails
└── is_remote (bool) └── is_remote
Linkedin specific Linkedin specific
└── job_level (str) └── job_level
Linkedin & Indeed specific Linkedin & Indeed specific
└── company_industry (str) └── company_industry
Indeed specific Indeed specific
├── company_country (str) ├── company_country
├── company_addresses (str) ├── company_addresses
├── company_employees_label (str) ├── company_employees_label
├── company_revenue_label (str) ├── company_revenue_label
├── company_description (str) ├── company_description
├── ceo_name (str) ├── ceo_name
├── ceo_photo_url (str) ├── ceo_photo_url
├── logo_photo_url (str) ├── logo_photo_url
└── banner_photo_url (str) └── banner_photo_url
``` ```
## Supported Countries for Job Searching ## Supported Countries for Job Searching

1224
poetry.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "python-jobspy" name = "python-jobspy"
version = "1.1.59" version = "1.1.60"
description = "Job scraper for LinkedIn, Indeed, Glassdoor & ZipRecruiter" description = "Job scraper for LinkedIn, Indeed, Glassdoor & ZipRecruiter"
authors = ["Zachary Hampton <zachary@bunsly.com>", "Cullen Watson <cullen@bunsly.com>"] authors = ["Zachary Hampton <zachary@bunsly.com>", "Cullen Watson <cullen@bunsly.com>"]
homepage = "https://github.com/Bunsly/JobSpy" homepage = "https://github.com/Bunsly/JobSpy"

View File

@ -36,7 +36,7 @@ def scrape_jobs(
linkedin_company_ids: list[int] | None = None, linkedin_company_ids: list[int] | None = None,
offset: int | None = 0, offset: int | None = 0,
hours_old: int = None, hours_old: int = None,
enforce_annual_salary: bool = True, enforce_annual_salary: bool = False,
verbose: int = 2, verbose: int = 2,
**kwargs, **kwargs,
) -> pd.DataFrame: ) -> pd.DataFrame:
@ -182,10 +182,15 @@ def scrape_jobs(
job_data["min_amount"], job_data["min_amount"],
job_data["max_amount"], job_data["max_amount"],
job_data["currency"], job_data["currency"],
) = extract_salary(job_data["description"], enforce_annual_salary=enforce_annual_salary) ) = extract_salary(
job_data["description"],
enforce_annual_salary=enforce_annual_salary,
)
job_data["salary_source"] = SalarySource.DESCRIPTION.value job_data["salary_source"] = SalarySource.DESCRIPTION.value
job_data["salary_source"] = (
job_data["salary_source"] if job_data["min_amount"] else None
)
job_df = pd.DataFrame([job_data]) job_df = pd.DataFrame([job_data])
jobs_dfs.append(job_df) jobs_dfs.append(job_df)
@ -207,6 +212,7 @@ def scrape_jobs(
"location", "location",
"job_type", "job_type",
"date_posted", "date_posted",
"salary_source",
"interval", "interval",
"min_amount", "min_amount",
"max_amount", "max_amount",