mirror of
https://github.com/Bunsly/JobSpy.git
synced 2026-03-05 12:04:33 -08:00
refactor:organize code
This commit is contained in:
41
jobspy/google/util.py
Normal file
41
jobspy/google/util.py
Normal file
@@ -0,0 +1,41 @@
|
||||
import re
|
||||
|
||||
from jobspy.util import create_logger
|
||||
|
||||
log = create_logger("Google")
|
||||
|
||||
|
||||
def find_job_info(jobs_data: list | dict) -> list | None:
|
||||
"""Iterates through the JSON data to find the job listings"""
|
||||
if isinstance(jobs_data, dict):
|
||||
for key, value in jobs_data.items():
|
||||
if key == "520084652" and isinstance(value, list):
|
||||
return value
|
||||
else:
|
||||
result = find_job_info(value)
|
||||
if result:
|
||||
return result
|
||||
elif isinstance(jobs_data, list):
|
||||
for item in jobs_data:
|
||||
result = find_job_info(item)
|
||||
if result:
|
||||
return result
|
||||
return None
|
||||
|
||||
|
||||
def find_job_info_initial_page(html_text: str):
|
||||
pattern = f'520084652":(' + r"\[.*?\]\s*])\s*}\s*]\s*]\s*]\s*]\s*]"
|
||||
results = []
|
||||
matches = re.finditer(pattern, html_text)
|
||||
|
||||
import json
|
||||
|
||||
for match in matches:
|
||||
try:
|
||||
parsed_data = json.loads(match.group(1))
|
||||
results.append(parsed_data)
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
log.error(f"Failed to parse match: {str(e)}")
|
||||
results.append({"raw_match": match.group(0), "error": str(e)})
|
||||
return results
|
||||
Reference in New Issue
Block a user