mirror of https://github.com/Bunsly/JobSpy
init commit
models ready, logic file for goozali based on the linkdin scraperpull/231/head
parent
f0ea89b357
commit
3dc15195d5
|
@ -1,16 +1,26 @@
|
|||
import asyncio
|
||||
from enum import Enum
|
||||
from db.job_repository import JobRepository
|
||||
from jobspy import scrape_jobs
|
||||
from jobspy.telegram_bot import TelegramBot
|
||||
|
||||
|
||||
class Site(Enum):
|
||||
LINKEDIN = "linkedin"
|
||||
GOOZALI = "goozali"
|
||||
INDEED = "indeed"
|
||||
ZIP_RECRUITER = "zip_recruiter"
|
||||
GLASSDOOR = "glassdoor"
|
||||
GOOGLE = "google"
|
||||
|
||||
|
||||
async def main():
|
||||
telegramBot = TelegramBot()
|
||||
jobRepository = JobRepository()
|
||||
|
||||
jobs = scrape_jobs(
|
||||
# site_name=["indeed", "linkedin", "zip_recruiter", "glassdoor", "google"],
|
||||
site_name=["indeed"],
|
||||
# site_name=[Site.LINKEDIN, Site.GOOZALI, Site.GLASSDOOR, Site.INDEED],
|
||||
site_name=[Site.GOOZALI],
|
||||
search_term="software engineer",
|
||||
google_search_term="software engineer jobs near Tel Aviv Israel since yesterday",
|
||||
location="Central, Israel",
|
||||
|
|
|
@ -2,6 +2,8 @@ from __future__ import annotations
|
|||
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
from jobspy.main import Site
|
||||
|
||||
from ..jobs import (
|
||||
Enum,
|
||||
BaseModel,
|
||||
|
@ -12,14 +14,6 @@ from ..jobs import (
|
|||
)
|
||||
|
||||
|
||||
class Site(Enum):
|
||||
LINKEDIN = "linkedin"
|
||||
INDEED = "indeed"
|
||||
ZIP_RECRUITER = "zip_recruiter"
|
||||
GLASSDOOR = "glassdoor"
|
||||
GOOGLE = "google"
|
||||
|
||||
|
||||
class SalarySource(Enum):
|
||||
DIRECT_DATA = "direct_data"
|
||||
DESCRIPTION = "description"
|
||||
|
|
|
@ -0,0 +1,439 @@
|
|||
"""
|
||||
jobspy.scrapers.Goozali
|
||||
~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
This module contains routines to scrape Goozali.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import math
|
||||
import time
|
||||
import random
|
||||
import regex as re
|
||||
from typing import Optional
|
||||
from datetime import datetime
|
||||
|
||||
from bs4.element import Tag
|
||||
from bs4 import BeautifulSoup
|
||||
from urllib.parse import urlparse, urlunparse, unquote
|
||||
from requests.exceptions import RetryError, RequestException
|
||||
from urllib3.exceptions import MaxRetryError
|
||||
from .constants import headers
|
||||
from .. import Scraper, ScraperInput, Site
|
||||
from ..exceptions import GoozaliException
|
||||
from ..utils import create_session, remove_attributes, create_logger
|
||||
from ...jobs import (
|
||||
JobPost,
|
||||
Location,
|
||||
JobResponse,
|
||||
JobType,
|
||||
Country,
|
||||
Compensation,
|
||||
DescriptionFormat,
|
||||
)
|
||||
from ..utils import (
|
||||
extract_emails_from_text,
|
||||
get_enum_from_job_type,
|
||||
currency_parser,
|
||||
markdown_converter,
|
||||
)
|
||||
|
||||
logger = create_logger("Goozali")
|
||||
|
||||
|
||||
class GoozaliScraper(Scraper):
|
||||
base_url = "https://www.Goozali.com"
|
||||
delay = 3
|
||||
band_delay = 4
|
||||
jobs_per_page = 25
|
||||
|
||||
def __init__(
|
||||
self, proxies: list[str] | str | None = None, ca_cert: str | None = None
|
||||
):
|
||||
"""
|
||||
Initializes GoozaliScraper with the Goozalijob search url
|
||||
"""
|
||||
super().__init__(Site.GOOZALI, proxies=proxies, ca_cert=ca_cert)
|
||||
self.session = create_session(
|
||||
proxies=self.proxies,
|
||||
ca_cert=ca_cert,
|
||||
is_tls=False,
|
||||
has_retry=True,
|
||||
delay=5,
|
||||
clear_cookies=True,
|
||||
)
|
||||
self.session.headers.update(headers)
|
||||
self.scraper_input = None
|
||||
self.country = "worldwide"
|
||||
self.job_url_direct_regex = re.compile(r'(?<=\?url=)[^"]+')
|
||||
|
||||
def scrape(self, scraper_input: ScraperInput) -> JobResponse:
|
||||
"""
|
||||
Scrapes Goozali for jobs with scraper_input criteria
|
||||
:param scraper_input:
|
||||
:return: job_response
|
||||
"""
|
||||
self.scraper_input = scraper_input
|
||||
job_list: list[JobPost] = []
|
||||
seen_ids = set()
|
||||
start = scraper_input.offset // 10 * 10 if scraper_input.offset else 0
|
||||
request_count = 0
|
||||
seconds_old = (
|
||||
scraper_input.hours_old * 3600 if scraper_input.hours_old else None
|
||||
)
|
||||
continue_search = (
|
||||
lambda: len(
|
||||
job_list) < scraper_input.results_wanted and start < 1000
|
||||
)
|
||||
for location in scraper_input.locations:
|
||||
logger.info(f"start searching for location: {location}")
|
||||
while continue_search():
|
||||
request_count += 1
|
||||
logger.info(
|
||||
f"search page: {
|
||||
request_count} / {math.ceil(scraper_input.results_wanted / 10)}"
|
||||
)
|
||||
params = {
|
||||
"keywords": scraper_input.search_term,
|
||||
"location": location,
|
||||
"distance": scraper_input.distance,
|
||||
"f_WT": 2 if scraper_input.is_remote else None,
|
||||
"f_JT": (
|
||||
self.job_type_code(scraper_input.job_type)
|
||||
if scraper_input.job_type
|
||||
else None
|
||||
),
|
||||
"pageNum": 0,
|
||||
"start": start,
|
||||
"f_AL": "true" if scraper_input.easy_apply else None,
|
||||
"f_C": (
|
||||
",".join(map(str, scraper_input.Goozali_company_ids))
|
||||
if scraper_input.Goozali_company_ids
|
||||
else None
|
||||
),
|
||||
}
|
||||
if seconds_old is not None:
|
||||
params["f_TPR"] = f"r{seconds_old}"
|
||||
|
||||
params = {k: v for k, v in params.items() if v is not None}
|
||||
try:
|
||||
response = self.session.get(
|
||||
f"{self.base_url}/jobs-guest/jobs/api/seeMoreJobPostings/search?",
|
||||
params=params,
|
||||
timeout=10,
|
||||
)
|
||||
if response.status_code not in range(200, 400):
|
||||
if response.status_code == 429:
|
||||
err = (
|
||||
f"429 Response - Blocked by Goozali for too many requests"
|
||||
)
|
||||
else:
|
||||
err = f"Goozali response status code {
|
||||
response.status_code}"
|
||||
err += f" - {response.text}"
|
||||
logger.error(err)
|
||||
return JobResponse(jobs=job_list)
|
||||
except MaxRetryError as e:
|
||||
"""Raised when the maximum number of retries is exceeded."""
|
||||
logger.error(f"RetryError: {str(e)}")
|
||||
logger.error(f"MaxRetryError for location: {location}")
|
||||
break
|
||||
except RetryError as e:
|
||||
"""Custom retries logic failed"""
|
||||
logger.error(f"RetryError: {str(e)}")
|
||||
logger.error(f"RetryError for location: {location}")
|
||||
break
|
||||
except Exception as e:
|
||||
if "Proxy responded with" in str(e):
|
||||
logger.error(f"Goozali: Bad proxy")
|
||||
else:
|
||||
logger.error(f"Goozali: {str(e)}")
|
||||
return JobResponse(jobs=job_list)
|
||||
|
||||
soup = BeautifulSoup(response.text, "html.parser")
|
||||
job_cards = soup.find_all("div", class_="base-search-card")
|
||||
if len(job_cards) == 0:
|
||||
break
|
||||
|
||||
for job_card in job_cards:
|
||||
href_tag = job_card.find(
|
||||
"a", class_="base-card__full-link")
|
||||
if href_tag and "href" in href_tag.attrs:
|
||||
href = href_tag.attrs["href"].split("?")[0]
|
||||
job_id = href.split("-")[-1]
|
||||
|
||||
if job_id in seen_ids:
|
||||
continue
|
||||
seen_ids.add(job_id)
|
||||
|
||||
try:
|
||||
fetch_desc = scraper_input.Goozali_fetch_description
|
||||
job_post = self._process_job(
|
||||
job_card, job_id, fetch_desc)
|
||||
if job_post:
|
||||
job_list.append(job_post)
|
||||
if not continue_search():
|
||||
break
|
||||
except Exception as e:
|
||||
raise GoozaliException(str(e))
|
||||
|
||||
if continue_search():
|
||||
time.sleep(random.uniform(
|
||||
self.delay, self.delay + self.band_delay))
|
||||
start += len(job_list)
|
||||
|
||||
job_list = job_list[: scraper_input.results_wanted]
|
||||
return JobResponse(jobs=job_list)
|
||||
|
||||
def _process_job(
|
||||
self, job_card: Tag, job_id: str, full_descr: bool
|
||||
) -> Optional[JobPost]:
|
||||
salary_tag = job_card.find(
|
||||
"span", class_="job-search-card__salary-info")
|
||||
|
||||
compensation = None
|
||||
if salary_tag:
|
||||
salary_text = salary_tag.get_text(separator=" ").strip()
|
||||
salary_values = [currency_parser(value)
|
||||
for value in salary_text.split("-")]
|
||||
salary_min = salary_values[0]
|
||||
salary_max = salary_values[1]
|
||||
currency = salary_text[0] if salary_text[0] != "$" else "USD"
|
||||
|
||||
compensation = Compensation(
|
||||
min_amount=int(salary_min),
|
||||
max_amount=int(salary_max),
|
||||
currency=currency,
|
||||
)
|
||||
|
||||
title_tag = job_card.find("span", class_="sr-only")
|
||||
title = title_tag.get_text(strip=True) if title_tag else "N/A"
|
||||
|
||||
company_tag = job_card.find("h4", class_="base-search-card__subtitle")
|
||||
company_a_tag = company_tag.find("a") if company_tag else None
|
||||
company_url = (
|
||||
urlunparse(urlparse(company_a_tag.get("href"))._replace(query=""))
|
||||
if company_a_tag and company_a_tag.has_attr("href")
|
||||
else ""
|
||||
)
|
||||
company = company_a_tag.get_text(
|
||||
strip=True) if company_a_tag else "N/A"
|
||||
|
||||
metadata_card = job_card.find(
|
||||
"div", class_="base-search-card__metadata")
|
||||
location = self._get_location(metadata_card)
|
||||
|
||||
datetime_tag = (
|
||||
metadata_card.find("time", class_="job-search-card__listdate")
|
||||
if metadata_card
|
||||
else None
|
||||
)
|
||||
date_posted = None
|
||||
if datetime_tag and "datetime" in datetime_tag.attrs:
|
||||
datetime_str = datetime_tag["datetime"]
|
||||
try:
|
||||
date_posted = datetime.strptime(datetime_str, "%Y-%m-%d")
|
||||
except:
|
||||
date_posted = None
|
||||
job_details = {}
|
||||
if full_descr:
|
||||
job_details = self._get_job_details(job_id)
|
||||
|
||||
return JobPost(
|
||||
id=f"li-{job_id}",
|
||||
title=title,
|
||||
company_name=company,
|
||||
company_url=company_url,
|
||||
location=location,
|
||||
date_posted=date_posted,
|
||||
datetime_posted=date_posted,
|
||||
job_url=f"{self.base_url}/jobs/view/{job_id}",
|
||||
compensation=compensation,
|
||||
job_type=job_details.get("job_type"),
|
||||
job_level=job_details.get("job_level", "").lower(),
|
||||
company_industry=job_details.get("company_industry"),
|
||||
description=job_details.get("description"),
|
||||
job_url_direct=job_details.get("job_url_direct"),
|
||||
emails=extract_emails_from_text(job_details.get("description")),
|
||||
company_logo=job_details.get("company_logo"),
|
||||
job_function=job_details.get("job_function"),
|
||||
)
|
||||
|
||||
def _get_job_details(self, job_id: str) -> dict:
|
||||
"""
|
||||
Retrieves job description and other job details by going to the job page url
|
||||
:param job_page_url:
|
||||
:return: dict
|
||||
"""
|
||||
try:
|
||||
response = self.session.get(
|
||||
f"{self.base_url}/jobs/view/{job_id}", timeout=5
|
||||
)
|
||||
response.raise_for_status()
|
||||
except:
|
||||
return {}
|
||||
if "Goozali.com/signup" in response.url:
|
||||
return {}
|
||||
|
||||
soup = BeautifulSoup(response.text, "html.parser")
|
||||
div_content = soup.find(
|
||||
"div", class_=lambda x: x and "show-more-less-html__markup" in x
|
||||
)
|
||||
description = None
|
||||
if div_content is not None:
|
||||
div_content = remove_attributes(div_content)
|
||||
description = div_content.prettify(formatter="html")
|
||||
if self.scraper_input.description_format == DescriptionFormat.MARKDOWN:
|
||||
description = markdown_converter(description)
|
||||
|
||||
h3_tag = soup.find(
|
||||
"h3", text=lambda text: text and "Job function" in text.strip()
|
||||
)
|
||||
|
||||
job_function = None
|
||||
if h3_tag:
|
||||
job_function_span = h3_tag.find_next(
|
||||
"span", class_="description__job-criteria-text"
|
||||
)
|
||||
if job_function_span:
|
||||
job_function = job_function_span.text.strip()
|
||||
|
||||
company_logo = (
|
||||
logo_image.get("data-delayed-url")
|
||||
if (logo_image := soup.find("img", {"class": "artdeco-entity-image"}))
|
||||
else None
|
||||
)
|
||||
return {
|
||||
"description": description,
|
||||
"job_level": self._parse_job_level(soup),
|
||||
"company_industry": self._parse_company_industry(soup),
|
||||
"job_type": self._parse_job_type(soup),
|
||||
"job_url_direct": self._parse_job_url_direct(soup),
|
||||
"company_logo": company_logo,
|
||||
"job_function": job_function,
|
||||
}
|
||||
|
||||
def _get_location(self, metadata_card: Optional[Tag]) -> Location:
|
||||
"""
|
||||
Extracts the location data from the job metadata card.
|
||||
:param metadata_card
|
||||
:return: location
|
||||
"""
|
||||
location = Location(country=Country.from_string(self.country))
|
||||
if metadata_card is not None:
|
||||
location_tag = metadata_card.find(
|
||||
"span", class_="job-search-card__location"
|
||||
)
|
||||
location_string = location_tag.text.strip() if location_tag else "N/A"
|
||||
parts = location_string.split(", ")
|
||||
if len(parts) == 2:
|
||||
city, state = parts
|
||||
location = Location(
|
||||
city=city,
|
||||
state=state,
|
||||
country=Country.from_string(self.country),
|
||||
)
|
||||
elif len(parts) == 3:
|
||||
city, state, country = parts
|
||||
country = Country.from_string(country)
|
||||
location = Location(city=city, state=state, country=country)
|
||||
return location
|
||||
|
||||
@staticmethod
|
||||
def _parse_job_type(soup_job_type: BeautifulSoup) -> list[JobType] | None:
|
||||
"""
|
||||
Gets the job type from job page
|
||||
:param soup_job_type:
|
||||
:return: JobType
|
||||
"""
|
||||
h3_tag = soup_job_type.find(
|
||||
"h3",
|
||||
class_="description__job-criteria-subheader",
|
||||
string=lambda text: "Employment type" in text,
|
||||
)
|
||||
employment_type = None
|
||||
if h3_tag:
|
||||
employment_type_span = h3_tag.find_next_sibling(
|
||||
"span",
|
||||
class_="description__job-criteria-text description__job-criteria-text--criteria",
|
||||
)
|
||||
if employment_type_span:
|
||||
employment_type = employment_type_span.get_text(strip=True)
|
||||
employment_type = employment_type.lower()
|
||||
employment_type = employment_type.replace("-", "")
|
||||
|
||||
return [get_enum_from_job_type(employment_type)] if employment_type else []
|
||||
|
||||
@staticmethod
|
||||
def _parse_job_level(soup_job_level: BeautifulSoup) -> str | None:
|
||||
"""
|
||||
Gets the job level from job page
|
||||
:param soup_job_level:
|
||||
:return: str
|
||||
"""
|
||||
h3_tag = soup_job_level.find(
|
||||
"h3",
|
||||
class_="description__job-criteria-subheader",
|
||||
string=lambda text: "Seniority level" in text,
|
||||
)
|
||||
job_level = None
|
||||
if h3_tag:
|
||||
job_level_span = h3_tag.find_next_sibling(
|
||||
"span",
|
||||
class_="description__job-criteria-text description__job-criteria-text--criteria",
|
||||
)
|
||||
if job_level_span:
|
||||
job_level = job_level_span.get_text(strip=True)
|
||||
|
||||
return job_level
|
||||
|
||||
@staticmethod
|
||||
def _parse_company_industry(soup_industry: BeautifulSoup) -> str | None:
|
||||
"""
|
||||
Gets the company industry from job page
|
||||
:param soup_industry:
|
||||
:return: str
|
||||
"""
|
||||
h3_tag = soup_industry.find(
|
||||
"h3",
|
||||
class_="description__job-criteria-subheader",
|
||||
string=lambda text: "Industries" in text,
|
||||
)
|
||||
industry = None
|
||||
if h3_tag:
|
||||
industry_span = h3_tag.find_next_sibling(
|
||||
"span",
|
||||
class_="description__job-criteria-text description__job-criteria-text--criteria",
|
||||
)
|
||||
if industry_span:
|
||||
industry = industry_span.get_text(strip=True)
|
||||
|
||||
return industry
|
||||
|
||||
def _parse_job_url_direct(self, soup: BeautifulSoup) -> str | None:
|
||||
"""
|
||||
Gets the job url direct from job page
|
||||
:param soup:
|
||||
:return: str
|
||||
"""
|
||||
job_url_direct = None
|
||||
job_url_direct_content = soup.find("code", id="applyUrl")
|
||||
if job_url_direct_content:
|
||||
job_url_direct_match = self.job_url_direct_regex.search(
|
||||
job_url_direct_content.decode_contents().strip()
|
||||
)
|
||||
if job_url_direct_match:
|
||||
job_url_direct = unquote(job_url_direct_match.group())
|
||||
|
||||
return job_url_direct
|
||||
|
||||
@staticmethod
|
||||
def job_type_code(job_type_enum: JobType) -> str:
|
||||
return {
|
||||
JobType.FULL_TIME: "F",
|
||||
JobType.PART_TIME: "P",
|
||||
JobType.INTERNSHIP: "I",
|
||||
JobType.CONTRACT: "C",
|
||||
JobType.TEMPORARY: "T",
|
||||
}.get(job_type_enum, "")
|
|
@ -0,0 +1,8 @@
|
|||
headers = {
|
||||
"authority": "www.linkedin.com",
|
||||
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
|
||||
"accept-language": "en-US,en;q=0.9",
|
||||
"cache-control": "max-age=0",
|
||||
"upgrade-insecure-requests": "1",
|
||||
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||||
}
|
|
@ -0,0 +1,20 @@
|
|||
from typing import Optional
|
||||
|
||||
from jobspy.scrapers.goozali.model import GoozaliColumnTypeOptions
|
||||
|
||||
|
||||
class GoozaliColumn:
|
||||
def __init__(self, id: str, name: str, description: Optional[str], type: str, typeOptions: GoozaliColumnTypeOptions,
|
||||
default: Optional[str], initialCreatedTime: str, initialCreatedByUserId: str,
|
||||
lastModifiedTime: str, lastModifiedByUserId: str, isEditableFromSync: bool):
|
||||
self.id = id
|
||||
self.name = name
|
||||
self.description = description
|
||||
self.type = type
|
||||
self.typeOptions = typeOptions
|
||||
self.default = default
|
||||
self.initialCreatedTime = initialCreatedTime
|
||||
self.initialCreatedByUserId = initialCreatedByUserId
|
||||
self.lastModifiedTime = lastModifiedTime
|
||||
self.lastModifiedByUserId = lastModifiedByUserId
|
||||
self.isEditableFromSync = isEditableFromSync
|
|
@ -0,0 +1,8 @@
|
|||
from typing import Optional
|
||||
|
||||
|
||||
class GoozaliColumnChoice:
|
||||
def __init__(self, id: str, name: str, color: Optional[str] = None):
|
||||
self.id = id
|
||||
self.name = name
|
||||
self.color = color
|
|
@ -0,0 +1,10 @@
|
|||
from typing import Dict, List
|
||||
|
||||
from jobspy.scrapers.goozali.model.GoozaliColumnChoice import GoozaliColumnChoice
|
||||
|
||||
|
||||
class GoozaliColumnTypeOptions:
|
||||
def __init__(self, choiceOrder: List[str], choices: Dict[str, GoozaliColumnChoice], disableColors: bool):
|
||||
self.choiceOrder = choiceOrder
|
||||
self.choices = choices
|
||||
self.disableColors = disableColors
|
|
@ -0,0 +1,7 @@
|
|||
from jobspy.scrapers.goozali.model.GozaaliResponseData import GoozaliResponseData
|
||||
|
||||
|
||||
class GoozaliResponse:
|
||||
def __init__(self, msg: str, data: GoozaliResponseData):
|
||||
self.msg = msg
|
||||
self.data = data
|
|
@ -0,0 +1,8 @@
|
|||
from typing import Dict, List
|
||||
|
||||
|
||||
class GoozaliRow:
|
||||
def __init__(self, id: str, createdTime: str, cellValuesByColumnId: Dict[str, List[str]]):
|
||||
self.id = id
|
||||
self.createdTime = createdTime
|
||||
self.cellValuesByColumnId = cellValuesByColumnId
|
|
@ -0,0 +1,17 @@
|
|||
from typing import Dict, List
|
||||
|
||||
from jobspy.scrapers.goozali.model.GoozaliColumn import GoozaliColumn
|
||||
from jobspy.scrapers.goozali.model.GoozaliRow import GoozaliRow
|
||||
|
||||
|
||||
class GoozaliTable:
|
||||
def __init__(self, applicationId: str, id: str, name: str, columns: List[GoozaliColumn], primaryColumnId: str,
|
||||
meaningfulColumnOrder: List[Dict[str, str]], viewOrder: List[str], rows: List[GoozaliRow]):
|
||||
self.applicationId = applicationId
|
||||
self.id = id
|
||||
self.name = name
|
||||
self.columns = columns
|
||||
self.primaryColumnId = primaryColumnId
|
||||
self.meaningfulColumnOrder = meaningfulColumnOrder
|
||||
self.viewOrder = viewOrder
|
||||
self.rows = rows
|
|
@ -0,0 +1,6 @@
|
|||
from jobspy.scrapers.goozali.model import GoozaliTable
|
||||
|
||||
|
||||
class GoozaliResponseData:
|
||||
def __init__(self, table: GoozaliTable):
|
||||
self.table = table
|
|
@ -0,0 +1,68 @@
|
|||
from jobspy import scrape_jobs
|
||||
import pandas as pd
|
||||
|
||||
from jobspy.scrapers.goozali.model import GoozaliColumnTypeOptions, GoozaliResponse, GoozaliTable
|
||||
from jobspy.scrapers.goozali.model.GoozaliColumn import GoozaliColumn
|
||||
from jobspy.scrapers.goozali.model.GoozaliColumnChoice import GoozaliColumnChoice
|
||||
from jobspy.scrapers.goozali.model.GoozaliRow import GoozaliRow
|
||||
from jobspy.scrapers.goozali.model.GozaaliResponseData import GoozaliResponseData
|
||||
|
||||
|
||||
def test_goozali():
|
||||
result = scrape_jobs(
|
||||
site_name="glassdoor",
|
||||
search_term="engineer",
|
||||
results_wanted=5,
|
||||
)
|
||||
assert (
|
||||
isinstance(result, pd.DataFrame) and len(result) == 5
|
||||
), "Result should be a non-empty DataFrame"
|
||||
|
||||
|
||||
def createMockGoozaliResponse() -> GoozaliResponse:
|
||||
data = GoozaliResponseData(table=GoozaliTable(
|
||||
applicationId="app7OQjqEzTtCRq7u",
|
||||
id="tblBQjp5Aw6O172VY",
|
||||
name="Shared view table",
|
||||
columns=[
|
||||
GoozaliColumn(
|
||||
id="fldIf9DbRpNRLJXuD",
|
||||
name="Industry",
|
||||
description=None,
|
||||
type="multiSelect",
|
||||
typeOptions=GoozaliColumnTypeOptions(
|
||||
choiceOrder=["selcE6QUv4vWIIcZR",
|
||||
"sel0JIQKMmz3jCFUN", "selzhpwlfPssG4OEx"],
|
||||
choices={
|
||||
"selwhDNBom2dZJkgv": GoozaliColumnChoice(id="selwhDNBom2dZJkgv", name="HealthTech", color="orange"),
|
||||
"selReHesNOVD3PvCo": GoozaliColumnChoice(id="selReHesNOVD3PvCo", name="Automotive", color="pink")
|
||||
},
|
||||
disableColors=False
|
||||
),
|
||||
default=None,
|
||||
initialCreatedTime="2022-12-29T10:23:21.000Z",
|
||||
initialCreatedByUserId="usr1fVy2RIyCuGHec",
|
||||
lastModifiedTime="2024-07-21T09:30:02.000Z",
|
||||
lastModifiedByUserId="usr1fVy2RIyCuGHec",
|
||||
isEditableFromSync=False
|
||||
)
|
||||
],
|
||||
primaryColumnId="fldLT11B0cpV6p9Uz",
|
||||
meaningfulColumnOrder=[
|
||||
{"columnId": "fldLT11B0cpV6p9Uz", "visibility": True},
|
||||
{"columnId": "fldIf9DbRpNRLJXuD", "visibility": True, "width": 368},
|
||||
{"columnId": "fldOLt34j8Pm2dcCq", "visibility": True, "width": 182}
|
||||
],
|
||||
viewOrder=["viwNRSqqmqZLP0a3C"],
|
||||
rows=[
|
||||
GoozaliRow(
|
||||
id="recwiKgHT9mJrqoxa",
|
||||
createdTime="2023-01-09T10:32:09.000Z",
|
||||
cellValuesByColumnId={
|
||||
"fldLT11B0cpV6p9Uz": ["3M"],
|
||||
"fldIf9DbRpNRLJXuD": ["selwhDNBom2dZJkgv", "selReHesNOVD3PvCo"]
|
||||
}
|
||||
)
|
||||
]
|
||||
))
|
||||
return GoozaliResponse(msg="SUCCESS", data=data)
|
Loading…
Reference in New Issue