Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -198,4 +198,6 @@ cython_debug/

# terraform modules
*terraform*
builds
builds

test.py
19 changes: 19 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -199,3 +199,22 @@ pull the example focus csv: `curl -LO https://raw.githubusercontent.com/FinOps-O
install [poetry](https://python-poetry.org/docs/#installation)

testing: `make test`

## built-in focus conversions

users may contribute a platform-specific subclass of the focus object to handle special cases in their billing exports.

### mongodb atlas

```python
from harness_ccm_external_data import MongoDBAtlas

atlas = MongoDBAtlas(
"MongoDB Atlas",
"My Company Inc.",
"usage-summary-8765434567887656789-20250201.csv",
harness_account_id=getenv("HARNESS_ACCOUNT_ID"),
harness_platform_api_key=getenv("HARNESS_PLATFORM_API_KEY"),
)
atlas.upload()
```
67 changes: 64 additions & 3 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
[project]
name = "harness-ccm-external-data"
version = "0.1.6"
version = "0.2.0-rc.1"
description = "Tools to help manage external data for Harness CCM"
authors = [
{name = "Riley Snyder",email = "riley.snyder@harness.io"}
]
readme = "README.md"
requires-python = ">=3.8"
requires-python = ">=3.9"
dependencies = [
"pandas (>=2.3.0,<3.0.0)",
"requests (>=2.32.4,<3.0.0)"
Expand Down
1 change: 1 addition & 0 deletions src/harness_ccm_external_data/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
from .focus_data import Focus, HARNESS_FIELDS
from .mongodb_atlas import MongoDBAtlas
64 changes: 38 additions & 26 deletions src/harness_ccm_external_data/focus_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,12 @@ def __init__(
):
self.provider = provider # cloud platform
self.data_source = data_source # instance of this cloud platform
self.source = source
self.provider_type = provider_type
self.invoice_period = invoice_period
self.provider_uuid = provider_uuid
self.source = source
self.separator = separator
self.skip_rows = skip_rows
self.cost_multiplier = cost_multiplier
self.converters = converters
self.additional_columns = {}
Expand All @@ -90,6 +92,7 @@ def __init__(
self.harness_account_id = harness_account_id

# Stub for generated content
self.billing_content: pd.DataFrame = None
self.harness_focus_content: pd.DataFrame = None

# sanitize mappings
Expand Down Expand Up @@ -122,39 +125,43 @@ def __init__(
)
del self.mapping[field]

baseline_converters = {
self.baseline_converters = {
# make sure provider is set
self.mapping["ProviderName"]: lambda x: self.provider
if not x
else x,
}
if cost_multiplier:
# apply given cost multiplier
baseline_converters[self.mapping["EffectiveCost"]] = (
self.baseline_converters[self.mapping["EffectiveCost"]] = (
lambda x: pd.to_numeric(x) * cost_multiplier
)

def load_and_convert_data(self):
"""
Load in the billing data and apply any specified modifications
"""
self.billing_content = (
self.source
if isinstance(self.source, pd.DataFrame)
else pd.read_csv(
self.source,
sep=separator,
sep=self.separator,
engine="python",
skiprows=skip_rows,
skiprows=self.skip_rows,
# any converters specified by the user will override built-in ones
converters={**baseline_converters, **converters},
converters={**self.baseline_converters, **self.converters},
)
)

for field, value in self.additional_columns.items():
self.billing_content[field] = value

def render(self) -> pd.DataFrame:
def convert_fields(self) -> pd.DataFrame:
"""
Create the Harness-aligned FOCUS CSV
Convert the billing data to a format that is compatible with Harness
"""

if self.billing_content is None:
self.load_and_convert_data()

self.harness_focus_content = pd.DataFrame()
for focus_field, source_field in self.mapping.items():
if source_field in self.billing_content.columns:
Expand All @@ -165,17 +172,22 @@ def render(self) -> pd.DataFrame:
# Default value for missing columns
self.harness_focus_content[focus_field] = source_field

for field, value in self.additional_columns.items():
self.harness_focus_content[field] = value

return self.harness_focus_content

def render_file(self, filename: str):
"""
Save the Harness-CSV to a file
"""

if self.billing_content is None:
self.load_and_convert_data()
if self.harness_focus_content is None:
self.render().to_csv(filename, index=False)
else:
self.harness_focus_content.to_csv(filename, index=False)
self.convert_fields()

self.harness_focus_content.to_csv(filename, index=False)

def _list_providers(self):
"""
Expand Down Expand Up @@ -383,7 +395,7 @@ def _get_invoice_period(self) -> str:
start_date_str = self.harness_focus_content["BillingPeriodStart"].iloc[0]

# Parse the dates
start_date = datetime.strptime(start_date_str, "%Y-%m-%d %H:%M:%S")
start_date = datetime.strptime(start_date_str, "%Y-%m-%dT%H:%M:%S")

# Calculate the first day of the month for the start date
period_start = start_date.replace(day=1)
Expand Down Expand Up @@ -425,7 +437,7 @@ def _trigger_ingestion(self, provider_id: str, invoice_periods: list) -> bool:

def upload(
self, harness_platform_api_key: str = None, harness_account_id: str = None
):
) -> str | None:
"""
Upload the Harness-CSV data to Harness

Expand All @@ -434,7 +446,7 @@ def upload(
harness_account_id (str): Account ID for Harness

Returns:
bool: True if all steps completed successfully, False otherwise
str | None: Object name if all steps completed successfully, None otherwise
"""

if harness_platform_api_key:
Expand All @@ -445,27 +457,27 @@ def upload(

# Ensure we have the rendered content
if self.harness_focus_content is None:
self.render()
self.convert_fields()

csv_content = self.harness_focus_content.to_csv(index=False)
md5_hash = self._get_md5_hash(csv_content)

# Ensure we have a provider
if self.provider_uuid is None:
if not self._create_provider():
return False
return None

# If no invoice_period is provided, calculate it from the data
this_invoice_period = self._get_invoice_period()
if not this_invoice_period:
print("Failed to determine invoice period from data")
return False
return None

# Check if file has already been uploaded
for file in self.list_files():
if file["md5"] == md5_hash:
print(f"File already uploaded: {md5_hash}")
return False
return None

# Generate a unique object name
timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
Expand All @@ -479,12 +491,12 @@ def upload(
)
if not signed_url:
print("Failed to get signed URL")
return False
return None

# Step 2: Upload to GCS
if not self._upload_to_gcs(signed_url, csv_content):
print("Failed to upload to GCS")
return False
return None

# Extract the GCS URL from the signed URL (remove query parameters)
cloud_storage_path = signed_url.split("?")[0]
Expand All @@ -499,14 +511,14 @@ def upload(
cloud_storage_path,
):
print("Failed to mark upload as complete")
return False
return None

# Step 4: Trigger ingestion
if not self._trigger_ingestion(self.provider_uuid, [this_invoice_period]):
print("Failed to trigger ingestion")
return False
return None

return True
return object_name

def create_dataset(data: list(list()) = None) -> pd.DataFrame:
"""
Expand Down
Loading