remove postal code (#17)

* fix: remove postal code
pull/20/head
Cullen Watson 2023-08-26 22:09:04 -05:00 committed by GitHub
parent d72d14db02
commit 33f6768ec8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 13 additions and 21 deletions

View File

@ -53,8 +53,6 @@ site_type (enum)
│ │ ├── country (str) │ │ ├── country (str)
│ │ ├── city (str) │ │ ├── city (str)
│ │ ├── state (str) │ │ ├── state (str)
│ │ ├── postal_code (str)
│ │ └── address (str)
│ ├── description (str) │ ├── description (str)
│ ├── job_type (enum) │ ├── job_type (enum)
│ ├── compensation (object) │ ├── compensation (object)
@ -79,11 +77,9 @@ site_type (enum)
"company_name": "INTEL", "company_name": "INTEL",
"job_url": "https://www.indeed.com/jobs/viewjob?jk=a2cfbb98d2002228", "job_url": "https://www.indeed.com/jobs/viewjob?jk=a2cfbb98d2002228",
"location": { "location": {
"country": "US", "country": "USA",
"city": "Austin", "city": "Austin",
"state": "TX", "state": "TX",
"postal_code": null,
"address": null
}, },
"description": "Job Description Designs, develops, tests, and debugs..." "description": "Job Description Designs, develops, tests, and debugs..."
"job_type": "fulltime", "job_type": "fulltime",
@ -95,7 +91,10 @@ site_type (enum)
}, },
"date_posted": "2023-08-18T00:00:00" "date_posted": "2023-08-18T00:00:00"
}, ... }, ...
] ],
"total_results": 845,
"returned_results": 15
},
"linkedin": { "linkedin": {
"success": true, "success": true,
"error": null, "error": null,
@ -105,11 +104,9 @@ site_type (enum)
"company_name": "Public Partnerships | PPL", "company_name": "Public Partnerships | PPL",
"job_url": "https://www.linkedin.com/jobs/view/3690013792", "job_url": "https://www.linkedin.com/jobs/view/3690013792",
"location": { "location": {
"country": "US", "country": "USA",
"city": "Austin", "city": "Austin",
"state": "TX", "state": "TX",
"postal_code": null,
"address": null
}, },
"description": "Public Partnerships LLC supports individuals with disabilities..." "description": "Public Partnerships LLC supports individuals with disabilities..."
"job_type": null, "job_type": null,

View File

@ -19,11 +19,9 @@ class JobType(Enum):
class Location(BaseModel): class Location(BaseModel):
country: str country: str = "USA"
city: str = None city: str = None
state: str = None state: str = None
postal_code: str = None
address: str = None
class CompensationInterval(Enum): class CompensationInterval(Enum):
@ -38,7 +36,7 @@ class Compensation(BaseModel):
interval: CompensationInterval interval: CompensationInterval
min_amount: float min_amount: float
max_amount: float max_amount: float
currency: str = "US" currency: str = "USA"
class JobPost(BaseModel): class JobPost(BaseModel):

View File

@ -125,7 +125,6 @@ class IndeedScraper(Scraper):
city=job.get("jobLocationCity"), city=job.get("jobLocationCity"),
state=job.get("jobLocationState"), state=job.get("jobLocationState"),
postal_code=job.get("jobLocationPostal"), postal_code=job.get("jobLocationPostal"),
country="US",
), ),
job_type=job_type, job_type=job_type,
compensation=compensation, compensation=compensation,

View File

@ -167,9 +167,6 @@ class LinkedInScraper(Scraper):
:param metadata_card :param metadata_card
:return: location :return: location
""" """
location = Location(
country="US",
)
if metadata_card is not None: if metadata_card is not None:
location_tag = metadata_card.find( location_tag = metadata_card.find(
"span", class_="job-search-card__location" "span", class_="job-search-card__location"
@ -179,7 +176,6 @@ class LinkedInScraper(Scraper):
if len(parts) == 2: if len(parts) == 2:
city, state = parts city, state = parts
location = Location( location = Location(
country="US",
city=city, city=city,
state=state, state=state,
) )

View File

@ -292,7 +292,6 @@ class ZipRecruiterScraper(Scraper):
else: else:
city, state = None, None city, state = None, None
return Location( return Location(
country="US",
city=city, city=city,
state=state, state=state,
) )

View File

@ -23,6 +23,7 @@ async def scrape_jobs(scraper_input: ScraperInput) -> Dict[str, JobResponse]:
:param scraper_input: :param scraper_input:
:return: Dict[str, JobResponse]: where each key is a site :return: Dict[str, JobResponse]: where each key is a site
""" """
def scrape_site(site: Site) -> Tuple[str, JobResponse]: def scrape_site(site: Site) -> Tuple[str, JobResponse]:
scraper_class = SCRAPER_MAPPING[site] scraper_class = SCRAPER_MAPPING[site]
scraper = scraper_class() scraper = scraper_class()
@ -30,7 +31,9 @@ async def scrape_jobs(scraper_input: ScraperInput) -> Dict[str, JobResponse]:
return (site.value, scraped_data) return (site.value, scraped_data)
with ThreadPoolExecutor() as executor: with ThreadPoolExecutor() as executor:
resp_dict = {site: resp for site, resp in executor.map(scrape_site, scraper_input.site_type)} resp_dict = {
site: resp
for site, resp in executor.map(scrape_site, scraper_input.site_type)
}
return resp_dict return resp_dict