harness-community · rssnyder · Sep 17, 2025 · Sep 17, 2025 · Sep 17, 2025 · Sep 17, 2025
diff --git a/.gitignore b/.gitignore
@@ -198,4 +198,6 @@ cython_debug/
 
 # terraform modules
 *terraform*
-builds
+builds
+
+test.py
diff --git a/README.md b/README.md
@@ -199,3 +199,22 @@ pull the example focus csv: `curl -LO https://raw.githubusercontent.com/FinOps-O
 install [poetry](https://python-poetry.org/docs/#installation)
 
 testing: `make test`
+
+## built-in focus conversions
+
+users may contribute a platform-specific subclass of the focus object to handle special cases in their billing exports.
+
+### mongodb atlas
+
+```python
+from harness_ccm_external_data import MongoDBAtlas
+
+atlas = MongoDBAtlas(
+    "MongoDB Atlas",
+    "My Company Inc.",
+    "usage-summary-8765434567887656789-20250201.csv",
+    harness_account_id=getenv("HARNESS_ACCOUNT_ID"),
+    harness_platform_api_key=getenv("HARNESS_PLATFORM_API_KEY"),
+)
+atlas.upload()
+```
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,12 +1,12 @@
 [project]
 name = "harness-ccm-external-data"
-version = "0.1.6"
+version = "0.2.0-rc.1"
 description = "Tools to help manage external data for Harness CCM"
 authors = [
     {name = "Riley Snyder",email = "riley.snyder@harness.io"}
 ]
 readme = "README.md"
-requires-python = ">=3.8"
+requires-python = ">=3.9"
 dependencies = [
     "pandas (>=2.3.0,<3.0.0)",
     "requests (>=2.32.4,<3.0.0)"

diff --git a/src/harness_ccm_external_data/__init__.py b/src/harness_ccm_external_data/__init__.py
@@ -1 +1,2 @@
 from .focus_data import Focus, HARNESS_FIELDS
+from .mongodb_atlas import MongoDBAtlas
diff --git a/src/harness_ccm_external_data/focus_data.py b/src/harness_ccm_external_data/focus_data.py
@@ -72,10 +72,12 @@ def __init__(
     ):
         self.provider = provider  # cloud platform
         self.data_source = data_source  # instance of this cloud platform
+        self.source = source
         self.provider_type = provider_type
         self.invoice_period = invoice_period
         self.provider_uuid = provider_uuid
-        self.source = source
+        self.separator = separator
+        self.skip_rows = skip_rows
         self.cost_multiplier = cost_multiplier
         self.converters = converters
         self.additional_columns = {}
@@ -90,6 +92,7 @@ def __init__(
         self.harness_account_id = harness_account_id
 
         # Stub for generated content
+        self.billing_content: pd.DataFrame = None
         self.harness_focus_content: pd.DataFrame = None
 
         # sanitize mappings
@@ -122,39 +125,43 @@ def __init__(
                     )
                     del self.mapping[field]
 
-        baseline_converters = {
+        self.baseline_converters = {
             # make sure provider is set
             self.mapping["ProviderName"]: lambda x: self.provider
             if not x
             else x,
         }
         if cost_multiplier:
             # apply given cost multiplier
-            baseline_converters[self.mapping["EffectiveCost"]] = (
+            self.baseline_converters[self.mapping["EffectiveCost"]] = (
                 lambda x: pd.to_numeric(x) * cost_multiplier
             )
 
+    def load_and_convert_data(self):
+        """
+        Load in the billing data and apply any specified modifications
+        """
         self.billing_content = (
             self.source
             if isinstance(self.source, pd.DataFrame)
             else pd.read_csv(
                 self.source,
-                sep=separator,
+                sep=self.separator,
                 engine="python",
-                skiprows=skip_rows,
+                skiprows=self.skip_rows,
                 # any converters specified by the user will override built-in ones
-                converters={**baseline_converters, **converters},
+                converters={**self.baseline_converters, **self.converters},
             )
         )
 
-        for field, value in self.additional_columns.items():
-            self.billing_content[field] = value
-
-    def render(self) -> pd.DataFrame:
+    def convert_fields(self) -> pd.DataFrame:
         """
-        Create the Harness-aligned FOCUS CSV
+        Convert the billing data to a format that is compatible with Harness
         """
 
+        if self.billing_content is None:
+            self.load_and_convert_data()
+
         self.harness_focus_content = pd.DataFrame()
         for focus_field, source_field in self.mapping.items():
             if source_field in self.billing_content.columns:
@@ -165,17 +172,22 @@ def render(self) -> pd.DataFrame:
                 # Default value for missing columns
                 self.harness_focus_content[focus_field] = source_field
 
+        for field, value in self.additional_columns.items():
+            self.harness_focus_content[field] = value
+
         return self.harness_focus_content
 
     def render_file(self, filename: str):
         """
         Save the Harness-CSV to a file
         """
 
+        if self.billing_content is None:
+            self.load_and_convert_data()
         if self.harness_focus_content is None:
-            self.render().to_csv(filename, index=False)
-        else:
-            self.harness_focus_content.to_csv(filename, index=False)
+            self.convert_fields()
+
+        self.harness_focus_content.to_csv(filename, index=False)
 
     def _list_providers(self):
         """
@@ -383,7 +395,7 @@ def _get_invoice_period(self) -> str:
             start_date_str = self.harness_focus_content["BillingPeriodStart"].iloc[0]
 
             # Parse the dates
-            start_date = datetime.strptime(start_date_str, "%Y-%m-%d %H:%M:%S")
+            start_date = datetime.strptime(start_date_str, "%Y-%m-%dT%H:%M:%S")
 
             # Calculate the first day of the month for the start date
             period_start = start_date.replace(day=1)
@@ -425,7 +437,7 @@ def _trigger_ingestion(self, provider_id: str, invoice_periods: list) -> bool:
 
     def upload(
         self, harness_platform_api_key: str = None, harness_account_id: str = None
-    ):
+    ) -> str | None:
         """
         Upload the Harness-CSV data to Harness
 
@@ -434,7 +446,7 @@ def upload(
             harness_account_id (str): Account ID for Harness
 
         Returns:
-            bool: True if all steps completed successfully, False otherwise
+            str | None: Object name if all steps completed successfully, None otherwise
         """
 
         if harness_platform_api_key:
@@ -445,27 +457,27 @@ def upload(
 
         # Ensure we have the rendered content
         if self.harness_focus_content is None:
-            self.render()
+            self.convert_fields()
 
         csv_content = self.harness_focus_content.to_csv(index=False)
         md5_hash = self._get_md5_hash(csv_content)
 
         # Ensure we have a provider
         if self.provider_uuid is None:
             if not self._create_provider():
-                return False
+                return None
 
         # If no invoice_period is provided, calculate it from the data
         this_invoice_period = self._get_invoice_period()
         if not this_invoice_period:
             print("Failed to determine invoice period from data")
-            return False
+            return None
 
         # Check if file has already been uploaded
         for file in self.list_files():
             if file["md5"] == md5_hash:
                 print(f"File already uploaded: {md5_hash}")
-                return False
+                return None
 
         # Generate a unique object name
         timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
@@ -479,12 +491,12 @@ def upload(
         )
         if not signed_url:
             print("Failed to get signed URL")
-            return False
+            return None
 
         # Step 2: Upload to GCS
         if not self._upload_to_gcs(signed_url, csv_content):
             print("Failed to upload to GCS")
-            return False
+            return None
 
         # Extract the GCS URL from the signed URL (remove query parameters)
         cloud_storage_path = signed_url.split("?")[0]
@@ -499,14 +511,14 @@ def upload(
             cloud_storage_path,
         ):
             print("Failed to mark upload as complete")
-            return False
+            return None
 
         # Step 4: Trigger ingestion
         if not self._trigger_ingestion(self.provider_uuid, [this_invoice_period]):
             print("Failed to trigger ingestion")
-            return False
+            return None
 
-        return True
+        return object_name
 
     def create_dataset(data: list(list()) = None) -> pd.DataFrame:
         """
-Original file line number
+Diff line change
@@ Expand Up / @@ -198,4 +198,6 @@ cython_debug/ @@
     # terraform modules
     *terraform*
-    builds
+    builds
+    test.py
Original file line number	Diff line number	Diff line change
		@@ -1 +1,2 @@
		from .focus_data import Focus, HARNESS_FIELDS
		from .mongodb_atlas import MongoDBAtlas