removed the code parse the data to csv

2024-12-24 11:15:07 +02:00 · 2024-12-24 11:15:07 +02:00 · 54022f2b57
parent 79189f49ef
commit 54022f2b57
1 changed files with 0 additions and 132 deletions
--- a/src/jobspy/init.py
+++ b/src/jobspy/init.py
@ -149,135 +149,3 @@ def scrape_jobs(
                logger.error(f"Future Error occurred: {e}")

    return merged_jobs
-
-    def convert_to_annual(job_data: dict):
-        if job_data["interval"] == "hourly":
-            job_data["min_amount"] *= 2080
-            job_data["max_amount"] *= 2080
-        if job_data["interval"] == "monthly":
-            job_data["min_amount"] *= 12
-            job_data["max_amount"] *= 12
-        if job_data["interval"] == "weekly":
-            job_data["min_amount"] *= 52
-            job_data["max_amount"] *= 52
-        if job_data["interval"] == "daily":
-            job_data["min_amount"] *= 260
-            job_data["max_amount"] *= 260
-        job_data["interval"] = "yearly"
-
-    jobs_dfs: list[pd.DataFrame] = []
-
-    for site, job_response in site_to_jobs_dict.items():
-        for job in job_response.jobs:
-            job_data = job.dict()
-            job_url = job_data["job_url"]
-            job_data["job_url_hyper"] = f'<a href="{job_url}">{job_url}</a>'
-            job_data["site"] = site
-            job_data["company"] = job_data["company_name"]
-            job_data["job_type"] = (
-                ", ".join(job_type.value[0]
-                          for job_type in job_data["job_type"])
-                if job_data["job_type"]
-                else None
-            )
-            job_data["emails"] = (
-                ", ".join(job_data["emails"]) if job_data["emails"] else None
-            )
-            if job_data["location"]:
-                job_data["location"] = Location(
-                    **job_data["location"]
-                ).display_location()
-
-            compensation_obj = job_data.get("compensation")
-            if compensation_obj and isinstance(compensation_obj, dict):
-                job_data["interval"] = (
-                    compensation_obj.get("interval").value
-                    if compensation_obj.get("interval")
-                    else None
-                )
-                job_data["min_amount"] = compensation_obj.get("min_amount")
-                job_data["max_amount"] = compensation_obj.get("max_amount")
-                job_data["currency"] = compensation_obj.get("currency", "USD")
-                job_data["salary_source"] = SalarySource.DIRECT_DATA.value
-                if enforce_annual_salary and (
-                    job_data["interval"]
-                    and job_data["interval"] != "yearly"
-                    and job_data["min_amount"]
-                    and job_data["max_amount"]
-                ):
-                    convert_to_annual(job_data)
-
-            else:
-                if country_enum == Country.USA:
-                    (
-                        job_data["interval"],
-                        job_data["min_amount"],
-                        job_data["max_amount"],
-                        job_data["currency"],
-                    ) = extract_salary(
-                        job_data["description"],
-                        enforce_annual_salary=enforce_annual_salary,
-                    )
-                    job_data["salary_source"] = SalarySource.DESCRIPTION.value
-
-            job_data["salary_source"] = (
-                job_data["salary_source"]
-                if "min_amount" in job_data and job_data["min_amount"]
-                else None
-            )
-            job_df = pd.DataFrame([job_data])
-            jobs_dfs.append(job_df)
-
-    if jobs_dfs:
-        # Step 1: Filter out all-NA columns from each DataFrame before concatenation
-        filtered_dfs = [df.dropna(axis=1, how="all") for df in jobs_dfs]
-
-        # Step 2: Concatenate the filtered DataFrames
-        jobs_df = pd.concat(filtered_dfs, ignore_index=True)
-
-        # Desired column order
-        desired_order = [
-            "id",
-            "site",
-            "job_url_hyper" if hyperlinks else "job_url",
-            "job_url_direct",
-            "title",
-            "company",
-            "location",
-            "date_posted",
-            "job_type",
-            "salary_source",
-            "interval",
-            "min_amount",
-            "max_amount",
-            "currency",
-            "is_remote",
-            "job_level",
-            "job_function",
-            "listing_type",
-            "emails",
-            "description",
-            "company_industry",
-            "company_url",
-            "company_logo",
-            "company_url_direct",
-            "company_addresses",
-            "company_num_employees",
-            "company_revenue",
-            "company_description",
-        ]
-
-        # Step 3: Ensure all desired columns are present, adding missing ones as empty
-        for column in desired_order:
-            if column not in jobs_df.columns:
-                jobs_df[column] = None  # Add missing columns as empty
-
-        # Reorder the DataFrame according to the desired order
-        jobs_df = jobs_df[desired_order]
-
-        # Step 4: Sort the DataFrame as required
-        return jobs_df.sort_values(
-            by=["site", "date_posted"], ascending=[True, False]
-        ).reset_index(drop=True)
-    else:
-        return pd.DataFrame()