-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathschemas.json
More file actions
406 lines (406 loc) · 81.1 KB
/
schemas.json
File metadata and controls
406 lines (406 loc) · 81.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
{
"schemas": [
{
"p_id": "1",
"org_id": "1775167038",
"org_name": "Banking",
"schema_prompt": "Generate realistic synthetic US banking data with strict referential integrity across all tables and valid primary/foreign keys. \nFollow all DDL constraints exactly (data types, nullability, lengths, uniqueness) and keep dates/timestamps logically consistent with no future dates. \nMaintain business consistency: balances and amounts plausible, `OUTSTANDING_AMOUNT <= PRINCIPAL_AMOUNT`, and `PAYMENT_AMOUNT = principal + interest + penalty`. \nUse realistic distributions (account/customer statuses, transaction types/channels), mostly USD, and mark about 2% transactions as suspicious. \nCreate diverse, non-duplicate, test-ready records with coherent customer-account-loan-payment relationships.",
"schema_list": [
{
"table_id": "1666d0c5-604d-4e29-9014-3f4aac1ec3bd",
"table_name": "CUSTOMERS",
"num_entries": 12000,
"ddl": "CREATE TABLE CUSTOMERS (\n CUSTOMER_ID BIGINT PRIMARY KEY,\n FIRST_NAME VARCHAR(50) NOT NULL,\n LAST_NAME VARCHAR(50) NOT NULL,\n DATE_OF_BIRTH DATE NOT NULL,\n EMAIL VARCHAR(120) UNIQUE NOT NULL,\n PHONE VARCHAR(20),\n STREET_ADDRESS VARCHAR(120),\n CITY VARCHAR(60),\n STATE_CODE CHAR(2),\n POSTAL_CODE VARCHAR(10),\n KYC_STATUS VARCHAR(20) NOT NULL,\n RISK_SCORE DECIMAL(5,2),\n CREATED_AT TIMESTAMP NOT NULL,\n UPDATED_AT TIMESTAMP\n);",
"instructions": "Generate adult customers (age 18-90), realistic US names/addresses, unique email.\nKYC_STATUS in {VERIFIED,PENDING,REJECTED} with 80/15/5 distribution.\nRISK_SCORE between 0 and 100 with 2 decimals.\n",
"columns_list": []
},
{
"table_id": "d3a353ce-4988-4ba0-b8f3-6d6a01919684",
"table_name": "ACCOUNTS",
"num_entries": 13000,
"ddl": "CREATE TABLE ACCOUNTS (\n ACCOUNT_ID BIGINT PRIMARY KEY,\n CUSTOMER_ID BIGINT NOT NULL,\n ACCOUNT_NUMBER VARCHAR(20) UNIQUE NOT NULL,\n ACCOUNT_TYPE VARCHAR(20) NOT NULL,\n CURRENCY_CODE CHAR(3) NOT NULL,\n BALANCE DECIMAL(18,2) NOT NULL,\n AVAILABLE_BALANCE DECIMAL(18,2) NOT NULL,\n STATUS VARCHAR(20) NOT NULL,\n OPENED_DATE DATE NOT NULL,\n CLOSED_DATE DATE,\n BRANCH_CODE VARCHAR(20),\n OVERDRAFT_LIMIT DECIMAL(18,2),\n INTEREST_RATE DECIMAL(5,3),\n CREATED_AT TIMESTAMP NOT NULL,\n UPDATED_AT TIMESTAMP,\n FOREIGN KEY (CUSTOMER_ID) REFERENCES CUSTOMERS(CUSTOMER_ID)\n);",
"instructions": "Every CUSTOMER_ID must exist in CUSTOMERS.\nACCOUNT_TYPE in {CHECKING,SAVINGS} with 70/30 split.\nSTATUS in {ACTIVE,DORMANT,CLOSED} with 85/10/5 split.\nAVAILABLE_BALANCE <= BALANCE + OVERDRAFT_LIMIT.\n",
"columns_list": []
},
{
"table_id": "1ec2a691-485d-4273-acfa-bd1e7439e6b4",
"table_name": "TRANSACTIONS",
"num_entries": 15000,
"ddl": "CREATE TABLE TRANSACTIONS (\n TRANSACTION_ID BIGINT PRIMARY KEY,\n ACCOUNT_ID BIGINT NOT NULL,\n TRANSACTION_TS TIMESTAMP NOT NULL,\n VALUE_DATE DATE NOT NULL,\n TRANSACTION_TYPE VARCHAR(20) NOT NULL,\n CHANNEL VARCHAR(20) NOT NULL,\n AMOUNT DECIMAL(18,2) NOT NULL,\n CURRENCY_CODE CHAR(3) NOT NULL,\n DESCRIPTION VARCHAR(200),\n MERCHANT_NAME VARCHAR(120),\n MERCHANT_CATEGORY VARCHAR(60),\n COUNTERPARTY_ACCOUNT VARCHAR(34),\n STATUS VARCHAR(20) NOT NULL,\n IS_SUSPICIOUS BOOLEAN NOT NULL,\n REFERENCE_CODE VARCHAR(40) UNIQUE,\n FOREIGN KEY (ACCOUNT_ID) REFERENCES ACCOUNTS(ACCOUNT_ID)\n);",
"instructions": "Every ACCOUNT_ID must exist in ACCOUNTS.\nTRANSACTION_TYPE in {DEBIT,CREDIT,TRANSFER,FEE,INTEREST}.\nAMOUNT > 0 with right-skew (many small, few large).\n2% rows IS_SUSPICIOUS = true.\nVALUE_DATE should match or be 0-2 days after TRANSACTION_TS date.\n",
"columns_list": []
},
{
"table_id": "b585081b-c917-4638-89e4-f694171314d3",
"table_name": "LOANS",
"num_entries": 10500,
"ddl": "CREATE TABLE LOANS (\n LOAN_ID BIGINT PRIMARY KEY,\n CUSTOMER_ID BIGINT NOT NULL,\n ACCOUNT_ID BIGINT,\n LOAN_TYPE VARCHAR(30) NOT NULL,\n PRINCIPAL_AMOUNT DECIMAL(18,2) NOT NULL,\n OUTSTANDING_AMOUNT DECIMAL(18,2) NOT NULL,\n INTEREST_RATE DECIMAL(5,3) NOT NULL,\n TERM_MONTHS INT NOT NULL,\n START_DATE DATE NOT NULL,\n MATURITY_DATE DATE NOT NULL,\n STATUS VARCHAR(20) NOT NULL,\n COLLATERAL_TYPE VARCHAR(40),\n CREDIT_SCORE INT,\n CREATED_AT TIMESTAMP NOT NULL,\n UPDATED_AT TIMESTAMP,\n FOREIGN KEY (CUSTOMER_ID) REFERENCES CUSTOMERS(CUSTOMER_ID),\n FOREIGN KEY (ACCOUNT_ID) REFERENCES ACCOUNTS(ACCOUNT_ID)\n);",
"instructions": "Every CUSTOMER_ID must exist in CUSTOMERS.\nIf ACCOUNT_ID is present, it must exist in ACCOUNTS and belong to same CUSTOMER_ID.\nLOAN_TYPE in {PERSONAL,HOME,AUTO,SME}.\nOUTSTANDING_AMOUNT between 0 and PRINCIPAL_AMOUNT.\nMaturity date must be after start date by TERM_MONTHS.\n",
"columns_list": []
},
{
"table_id": "88fbd741-d0f7-4754-8b0d-660d3259d8bb",
"table_name": "LOAN_PAYMENTS",
"num_entries": 11000,
"ddl": "CREATE TABLE LOAN_PAYMENTS (\n PAYMENT_ID BIGINT PRIMARY KEY,\n LOAN_ID BIGINT NOT NULL,\n ACCOUNT_ID BIGINT NOT NULL,\n PAYMENT_TS TIMESTAMP NOT NULL,\n DUE_DATE DATE NOT NULL,\n PAID_DATE DATE,\n PAYMENT_AMOUNT DECIMAL(18,2) NOT NULL,\n PRINCIPAL_COMPONENT DECIMAL(18,2) NOT NULL,\n INTEREST_COMPONENT DECIMAL(18,2) NOT NULL,\n PENALTY_COMPONENT DECIMAL(18,2) NOT NULL,\n PAYMENT_METHOD VARCHAR(20) NOT NULL,\n PAYMENT_STATUS VARCHAR(20) NOT NULL,\n RECEIPT_NUMBER VARCHAR(40) UNIQUE,\n CREATED_AT TIMESTAMP NOT NULL,\n FOREIGN KEY (LOAN_ID) REFERENCES LOANS(LOAN_ID),\n FOREIGN KEY (ACCOUNT_ID) REFERENCES ACCOUNTS(ACCOUNT_ID)\n);",
"instructions": "Every LOAN_ID must exist in LOANS; ACCOUNT_ID must exist in ACCOUNTS.\nPAYMENT_AMOUNT = PRINCIPAL_COMPONENT + INTEREST_COMPONENT + PENALTY_COMPONENT.\nPAYMENT_STATUS in {PAID,PENDING,FAILED,LATE}.\nIf status=PAID then PAID_DATE is not null; otherwise PAID_DATE may be null.\n",
"columns_list": []
}
],
"schema_gen_status": "DONE",
"schema_gen_last_update": "2026-04-15 17:22:19",
"schema_gen_log": "Files saved locally.",
"dg_code_gen_status": "DONE",
"dg_code_gen_at": "2026-04-15 17:21:50",
"dg_code_gen_log": "Code generated successfully.",
"dg_bulkdata_gen_status": "DONE",
"dg_bulkdata_gen_at": "2026-04-15 17:22:18",
"dg_bulkdata_gen_log": "CSV generation completed.",
"dg_sf_upload_status": "DONE",
"dg_sf_upload_at": "2026-04-15 17:22:19",
"dg_sf_upload_log": "Validation for schema 'Banking': 26/26 checks passed. Relationships=6, distributions=3, conditionals=0, profiles=0.",
"last_run_log_path": "C:\\Users\\aakas\\Desktop\\DLS\\Datagenx\\datagenx\\schemas\\Banking\\logs\\latest.log",
"last_error_trace": "Generated data script failed. File: C:\\Users\\aakas\\Desktop\\DLS\\Datagenx\\datagenx\\schemas\\Banking\\code_generated\\generated_code.py. Details: Traceback (most recent call last):\n File \"C:\\Users\\aakas\\Desktop\\DLS\\Datagenx\\datagenx\\schemas\\Banking\\code_generated\\generated_code.py\", line 259, in <module>\n main()\n File \"C:\\Users\\aakas\\Desktop\\DLS\\Datagenx\\datagenx\\schemas\\Banking\\code_generated\\generated_code.py\", line 256, in main\n generate_loan_payments(loans, accounts)\n File \"C:\\Users\\aakas\\Desktop\\DLS\\Datagenx\\datagenx\\schemas\\Banking\\code_generated\\generated_code.py\", line 209, in generate_loan_payments\n principal_component = round(random.uniform(0, loan['OUTSTANDING_AMOUNT']), 2)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"C:\\Users\\aakas\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\random.py\", line 522, in uniform\n return a + (b - a) * self.random()\n ~~^~~\nTypeError: unsupported operand type(s) for -: 'str' and 'int'\nTraceback (most recent call last):\n File \"C:\\Users\\aakas\\Desktop\\DLS\\Datagenx\\datagenx\\data_generation_backend.py\", line 1437, in generate_schema_data\n run = subprocess.run(\n ^^^^^^^^^^^^^^^\n File \"C:\\Users\\aakas\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\subprocess.py\", line 571, in run\n raise CalledProcessError(retcode, process.args,\nsubprocess.CalledProcessError: Command '['C:\\\\Users\\\\aakas\\\\Desktop\\\\DLS\\\\Datagenx\\\\datagenx\\\\.venv\\\\Scripts\\\\python.exe', 'C:\\\\Users\\\\aakas\\\\Desktop\\\\DLS\\\\Datagenx\\\\datagenx\\\\schemas\\\\Banking\\\\code_generated\\\\generated_code.py']' returned non-zero exit status 1.\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n File \"C:\\Users\\aakas\\Desktop\\DLS\\Datagenx\\datagenx\\data_generation_backend.py\", line 1503, in generate_schema_data\n raise RuntimeError(\nRuntimeError: Generated data script failed. File: C:\\Users\\aakas\\Desktop\\DLS\\Datagenx\\datagenx\\schemas\\Banking\\code_generated\\generated_code.py. Details: Traceback (most recent call last):\n File \"C:\\Users\\aakas\\Desktop\\DLS\\Datagenx\\datagenx\\schemas\\Banking\\code_generated\\generated_code.py\", line 259, in <module>\n main()\n File \"C:\\Users\\aakas\\Desktop\\DLS\\Datagenx\\datagenx\\schemas\\Banking\\code_generated\\generated_code.py\", line 256, in main\n generate_loan_payments(loans, accounts)\n File \"C:\\Users\\aakas\\Desktop\\DLS\\Datagenx\\datagenx\\schemas\\Banking\\code_generated\\generated_code.py\", line 209, in generate_loan_payments\n principal_component = round(random.uniform(0, loan['OUTSTANDING_AMOUNT']), 2)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"C:\\Users\\aakas\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\random.py\", line 522, in uniform\n return a + (b - a) * self.random()\n ~~^~~\nTypeError: unsupported operand type(s) for -: 'str' and 'int'\n"
},
{
"p_id": "1",
"org_id": "1775329785",
"org_name": "Insurance",
"schema_prompt": "Generate schema according to instructions.",
"schema_list": [
{
"table_id": "11a554ed-307f-4e20-92b3-220075e6d07a",
"table_name": "INSU_CUSTOMER",
"num_entries": 6500,
"ddl": "CREATE TABLE INSU_CUSTOMER (\n CUSTOMER_ID STRING PRIMARY KEY,\n CUSTOMER_TYPE STRING,\n FIRST_NAME STRING,\n LAST_NAME STRING,\n GENDER STRING,\n BUSINESS_NAME STRING,\n DOB DATE,\n EMAIL STRING,\n PHONE STRING,\n COUNTRY STRING,\n STATE STRING,\n CITY STRING,\n ADDRESS_LINE1 STRING,\n ADDRESS_LINE2 STRING,\n POSTAL_CODE STRING,\n REGISTRATION_DATE DATE,\n CREATED_AT TIMESTAMP_NTZ\n);",
"instructions": "Purpose: Master records for insurance customers (individuals or businesses).\n customer_id: generate UUID string; must be unique.\n customer_type: choose from ['INDIVIDUAL','BUSINESS'] with distribution ~85/15.\n gender: if customer_type='INDIVIDUAL', choose from ['MALE','FEMALE','OTHER'] with distribution ~49/49/1; if 'Business', NULL.\n first_name/last_name: use faker names only when customer_type='INDIVIDUAL'; else 'NOT APPLICABLE'.\n business_name: faker.company() only when customer_type='BUSINESS'; else 'NOT APPLICABLE'.\n dob: if customer_type='INDIVIDUAL', random date between 1960-01-01 and 2005-12-31 (adults); if 'Business', NULL.\n email: faker.email(); ~100% populated; prefer global uniqueness.\n phone: faker.phone_number(); ~100% populated.\n country: pick from ['US','IN','AE','UK','CA'] with distribution ~50/20/10/10/10; state/city/postal coherent to country.\n address_line1: faker.street_address(); address_line2.\n registration_date: date between 2015-01-01 and current_date.\n created_at: datetime between registration_date and now (ensure created_at >= registration_date).",
"columns_list": []
},
{
"table_id": "c6530ebe-9922-4d68-8971-b21444b50207",
"table_name": "INSU_AGENT",
"num_entries": 500,
"ddl": "CREATE TABLE INSU_AGENT (\n AGENT_ID STRING PRIMARY KEY,\n AGENT_CODE STRING,\n FIRST_NAME STRING,\n LAST_NAME STRING,\n EMAIL STRING,\n PHONE STRING,\n LICENSE_NO STRING,\n COUNTRY STRING,\n STATE STRING,\n CITY STRING,\n STATUS STRING,\n HIRE_DATE DATE,\n CREATED_AT TIMESTAMP_NTZ\n);",
"instructions": "Purpose: Master records for licensed agents/brokers.\n agent_id: UUID; unique.\n agent_code: string like 'AGT' + 7\u20139 digits; must be unique.\n first_name/last_name: faker names.\n email: faker.email(); ~100% populated; unique-ish.\n phone: faker.phone_number(); ~100% populated.\n license_no: string like 'LIC' + 8\u201310 digits; ~98% unique.\n country/state/city: coherent geography; bias toward ['US','IN','UK'].\n status: choose from ['ACTIVE','INACTIVE'] with ~85/15.\n hire_date: between 2010-01-01 and today.\n created_at: datetime between hire_date and now.",
"columns_list": []
},
{
"table_id": "a2b1f503-605a-44b1-9e34-f895e21b7618",
"table_name": "INSU_POLICY",
"num_entries": 10000,
"ddl": "CREATE TABLE DATAGEN.INSURANCE.INSU_POLICY (\n\tPOLICY_ID STRING PRIMARY KEY,\n\tPOLICY_NUMBER STRING,\n\tCUSTOMER_ID STRING,\n\tPRODUCT_ID STRING,\n\tAGENT_ID STRING,\n\tSTATUS STRING,\n\tEFFECTIVE_DATE DATE,\n\tEXPIRATION_DATE DATE,\n\tPREMIUM_BILLING_FREQ STRING,\n\tPREMIUM_AMOUNT NUMBER(14,2),\n\tCURRENCY STRING,\n\tCREATED_AT TIMESTAMP_NTZ,\n\tprimary key (POLICY_ID),\n\tforeign key (CUSTOMER_ID) references INSU_CUSTOMER(CUSTOMER_ID),\n\tforeign key (PRODUCT_ID) references INSU_PRODUCT(PRODUCT_ID),\n\tforeign key (AGENT_ID) references INSU_AGENT(AGENT_ID)\n);",
"instructions": "POLICY_ID \u2192 Generate uuid4(); must be unique.\nPOLICY_NUMBER \u2192 Unique string like PL{YYYY}{8\u201310 digits} (e.g., PL2023 001829374).\nCUSTOMER_ID \u2192 Pick an existing INSU_CUSTOMER.CUSTOMER_ID (NOT NULL).\nPRODUCT_ID \u2192 Pick an existing INSU_PRODUCT.PRODUCT_ID, enforcing policy_type mix Auto/Home/Life/Health = 40%/25%/20%/15% via the product.\nAGENT_ID \u2192 Pick an existing INSU_AGENT.AGENT_ID ~90% of the time, else NULL (direct/online sale).\nSTATUS \u2192 One of ['ACTIVE','LAPSED','CANCELED','EXPIRED'] with ~70% Active (rest distributed).\nEFFECTIVE_DATE \u2192 Random date between 2016-01-01 and today.\nEXPIRATION_DATE \u2192 EFFECTIVE_DATE + 1 year \u00b1 60 days (ensure >= EFFECTIVE_DATE).\nPREMIUM_BILLING_FREQ \u2192 One of ['MONTHLY','QUARTERLY','ANNUAL'] with ~70% Monthly (20% Quarterly, 10% Annual).\nPREMIUM_AMOUNT \u2192 Annual premium > 0; suggest ranges by policy_type (Auto 300\u20132000, Home 500\u20133000, Life 200\u20135000, Health 200\u20134000) with \u00b115% noise; round 2 decimals.\nCURRENCY \u2192 Choose from ['USD','INR','AED','GBP','CAD'], default from customer country 80\u201390% of time (10\u201320% cross-currency noise).\nCREATED_AT \u2192 Timestamp between EFFECTIVE_DATE and LEAST(EXPIRATION_DATE, NOW()).",
"columns_list": []
},
{
"table_id": "f7a5b31a-c3ae-45be-9393-c6a89043efa0",
"table_name": "INSU_RISK_OBJECT",
"num_entries": 16500,
"ddl": "CREATE TABLE INSU_RISK_OBJECT (\n OBJECT_ID STRING PRIMARY KEY,\n POLICY_ID STRING,\n OBJECT_TYPE STRING,\n DESCRIPTION STRING,\n YEAR_MADE NUMBER(4,0),\n SERIAL_NO STRING,\n ADDRESS_LINE1 STRING,\n ADDRESS_LINE2 STRING,\n CITY STRING,\n STATE STRING,\n POSTAL_CODE STRING,\n COUNTRY STRING,\n SUM_INSURED NUMBER(14,2),\n CREATED_AT TIMESTAMP_NTZ,\n FOREIGN KEY (POLICY_ID) REFERENCES INSU_POLICY(POLICY_ID)\n);",
"instructions": "Purpose: Insured risk objects linked to policies (vehicles, properties, persons, equipment).\n object_id: UUID; unique.\n policy_id: FK to INSU_POLICY.policy_id; ~1\u20132 risk objects per policy on average to reach target rows.\n object_type: choose from ['VEHICLE','PROPERTY','PERSON','EQUIPMENT'] with ~50/35/10/5.\n description: faker.sentence(4\u201310 words).\n year_made: for Vehicle/Equipment, integer between 1980 and 2025;\n serial_no: ~100% populated for VEHICLE/PROPERTY;\n address_line1/address_line2/city/state/postal_code/country: mostly for Property (~80% filled when object_type='PROPERTY'); for other types, fill ~20\u201330% only.\n sum_insured: if available, pick between linked product.coverage_min and product.coverage_max; otherwise 20000\u20132000000.\n created_at: datetime between policy.effective_date and policy.expiration_date (or now if expiration in past).",
"columns_list": []
},
{
"table_id": "945453c0-e672-47be-b9ed-1ab19c618a23",
"table_name": "INSU_CLAIM",
"num_entries": 3500,
"ddl": "CREATE TABLE INSU_CLAIM (\n CLAIM_ID STRING PRIMARY KEY,\n CLAIM_NUMBER STRING,\n POLICY_ID STRING,\n CUSTOMER_ID STRING,\n CLAIM_TYPE STRING,\n INCIDENT_DATE DATE,\n REPORTED_DATE DATE,\n STATUS STRING,\n LOSS_ESTIMATE NUMBER(14,2),\n DEDUCTIBLE NUMBER(14,2),\n APPROVED_AMOUNT NUMBER(14,2),\n CLOSED_DATE DATE,\n CREATED_AT TIMESTAMP_NTZ,\n FOREIGN KEY (POLICY_ID) REFERENCES INSU_POLICY(POLICY_ID),\n FOREIGN KEY (CUSTOMER_ID) REFERENCES INSU_CUSTOMER(CUSTOMER_ID)\n);",
"instructions": "Purpose: Claims raised against policies.\n claim_id: UUID; unique.\n claim_number: string like 'CL' + 9\u201312 digits; must be unique.\n policy_id: FK to INSU_POLICY.policy_id.\n customer_id: FK to INSU_CUSTOMER.customer_id (should match policy.customer_id).\n claim_type: choose from ['ACCIDENT','THEFT','FIRE','MEDICAL','NATURAL DISASTER','LIABILITY','OTHERS'] with ~45/10/8/15/7/10/5.\n incident_date: date between policy.effective_date and min(policy.expiration_date, today).\n reported_date: incident_date + 0\u201330 days (>= incident_date).\n status: choose from ['OPEN','IN-REVIEW','APPROVED','REJECTED','CLOSED','WITHDRAWN'] with ~25/20/25/10/18/2.\n loss_estimate: 500\u2013500000 with long-tail; ensure >= 0.\n deductible: 0\u20135000; often <= product.deductible_max if accessible.\n approved_amount: if status in ['Approved','Closed'] then 50%\u2013100% of loss_estimate; if 'Rejected' then 0; if others then NULL.\n closed_date: only when status='Closed'; pick between reported_date and reported_date + 365 days; else NULL.\n created_at: datetime is now.",
"columns_list": []
},
{
"table_id": "49d8b5a6-118d-401c-836f-d0564ea00d17",
"table_name": "INSU_PAYMENT",
"num_entries": 13500,
"ddl": "CREATE TABLE INSU_PAYMENT (\n PAYMENT_ID STRING PRIMARY KEY,\n POLICY_ID STRING,\n CLAIM_ID STRING,\n DIRECTION STRING,\n STATUS STRING,\n METHOD STRING NOT NULL,\n AMOUNT NUMBER(14,2),\n CURRENCY STRING,\n PAYMENT_DATE DATE,\n TRANSACTION_REF STRING,\n CREATED_AT TIMESTAMP_NTZ,\n FOREIGN KEY (POLICY_ID) REFERENCES INSU_POLICY(POLICY_ID),\n FOREIGN KEY (CLAIM_ID) REFERENCES INSU_CLAIM(CLAIM_ID)\n);",
"instructions": "Purpose: Monetary transactions for policies and claims (premiums, payouts, refunds).\n payment_id: UUID; unique.\n policy_id: FK to INSU_POLICY.policy_id.\n claim_id: optional FK to INSU_CLAIM.claim_id; present for payouts (~30%), NULL for premium payments (~65%) and policy refunds (~5%).\n direction: choose from ['Premium','Payout','Refund'] with ~65/30/5 (must align with claim_id presence).\n status: choose from ['Completed','Pending','Failed'] with ~90/7/3.\n method: choose from ['ACH','Card','Check','Wire','Cash','UPI'] with ~35/30/10/15/5/5.\n amount: > 0; random amount from policy.premium_amount \u00b120% \n currency: pick based on policy/customer country mapping ['USD','INR','AED','GBP','CAD'].\n payment_date: if claim_id present, between claim.reported_date and (claim.closed_date or claim.reported_date + 60 days); else within [policy.effective_date .. policy.expiration_date].\n transaction_ref: string like 'TX' + 10\u201314 digits; mostly unique.\n created_at: datetime between payment_date and now.",
"columns_list": []
}
],
"schema_gen_status": "ERROR",
"schema_gen_last_update": "2026-04-15 15:55:56",
"schema_gen_log": "Generated data script failed. File: C:\\Users\\aakas\\Desktop\\DLS\\Datagenx\\datagenx\\schemas\\Insurance\\code_generated\\generated_code.py. Details: Traceback (most recent call last):\n File \"C:\\Users\\aakas\\Desktop\\DLS\\Datagenx\\datagenx\\schemas\\Insurance\\code_generated\\generated_code.py\", line 337, in <module>\n generate_policy_data(customers, agents, products)\n File \"C:\\Users\\aakas\\Desktop\\DLS\\Datagenx\\datagenx\\schemas\\Insurance\\code_generated\\generated_code.py\", line 183, in generate_policy_data\n | Log: C:\\Users\\aakas\\Desktop\\DLS\\Datagenx\\datagenx\\schemas\\Insurance\\logs\\latest.log",
"dg_code_gen_status": "DONE",
"dg_code_gen_at": "2026-04-15 15:55:54",
"dg_code_gen_log": "Code generated successfully.",
"dg_bulkdata_gen_status": "ERROR",
"dg_bulkdata_gen_at": "2026-04-15 15:55:54",
"dg_bulkdata_gen_log": "Running generated script.",
"dg_sf_upload_status": "NEW",
"dg_sf_upload_at": "2026-04-08 17:15:24",
"dg_sf_upload_log": "Generated data script failed. File: C:\\Users\\aakas\\Desktop\\DLS\\Datagenx\\datagenx\\schemas\\Insurance\\code_generated\\generated_code.py. Details: Traceback (most recent call last):\n File \"C:\\Users\\aakas\\Desktop\\DLS\\Datagenx\\datagenx\\schemas\\Insurance\\code_generated\\generated_code.py\", line 337, in <module>\n generate_policy_data(customers, agents, products)\n File \"C:\\Users\\aakas\\Desktop\\DLS\\Datagenx\\datagenx\\schemas\\Insurance\\code_generated\\generated_code.py\", line 183, in generate_policy_data\n | Log: C:\\Users\\aakas\\Desktop\\DLS\\Datagenx\\datagenx\\schemas\\Insurance\\logs\\latest.log",
"last_run_log_path": "C:\\Users\\aakas\\Desktop\\DLS\\Datagenx\\datagenx\\schemas\\Insurance\\logs\\latest.log",
"last_error_trace": "Generated data script failed. File: C:\\Users\\aakas\\Desktop\\DLS\\Datagenx\\datagenx\\schemas\\Insurance\\code_generated\\generated_code.py. Details: Traceback (most recent call last):\n File \"C:\\Users\\aakas\\Desktop\\DLS\\Datagenx\\datagenx\\schemas\\Insurance\\code_generated\\generated_code.py\", line 337, in <module>\n generate_policy_data(customers, agents, products)\n File \"C:\\Users\\aakas\\Desktop\\DLS\\Datagenx\\datagenx\\schemas\\Insurance\\code_generated\\generated_code.py\", line 183, in generate_policy_data\n customer = random.choice(customers)\n ^^^^^^^^^^^^^^^^^^^^^^^^\n File \"C:\\Users\\aakas\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\random.py\", line 373, in choice\n raise IndexError('Cannot choose from an empty sequence')\nIndexError: Cannot choose from an empty sequence\nTraceback (most recent call last):\n File \"C:\\Users\\aakas\\Desktop\\DLS\\Datagenx\\datagenx\\data_generation_backend.py\", line 1437, in generate_schema_data\n run = subprocess.run(\n ^^^^^^^^^^^^^^^\n File \"C:\\Users\\aakas\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\subprocess.py\", line 571, in run\n raise CalledProcessError(retcode, process.args,\nsubprocess.CalledProcessError: Command '['C:\\\\Users\\\\aakas\\\\Desktop\\\\DLS\\\\Datagenx\\\\datagenx\\\\.venv\\\\Scripts\\\\python.exe', 'C:\\\\Users\\\\aakas\\\\Desktop\\\\DLS\\\\Datagenx\\\\datagenx\\\\schemas\\\\Insurance\\\\code_generated\\\\generated_code.py']' returned non-zero exit status 1.\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n File \"C:\\Users\\aakas\\Desktop\\DLS\\Datagenx\\datagenx\\data_generation_backend.py\", line 1503, in generate_schema_data\n raise RuntimeError(\nRuntimeError: Generated data script failed. File: C:\\Users\\aakas\\Desktop\\DLS\\Datagenx\\datagenx\\schemas\\Insurance\\code_generated\\generated_code.py. Details: Traceback (most recent call last):\n File \"C:\\Users\\aakas\\Desktop\\DLS\\Datagenx\\datagenx\\schemas\\Insurance\\code_generated\\generated_code.py\", line 337, in <module>\n generate_policy_data(customers, agents, products)\n File \"C:\\Users\\aakas\\Desktop\\DLS\\Datagenx\\datagenx\\schemas\\Insurance\\code_generated\\generated_code.py\", line 183, in generate_policy_data\n customer = random.choice(customers)\n ^^^^^^^^^^^^^^^^^^^^^^^^\n File \"C:\\Users\\aakas\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\random.py\", line 373, in choice\n raise IndexError('Cannot choose from an empty sequence')\nIndexError: Cannot choose from an empty sequence\n"
},
{
"p_id": "1",
"org_id": "1775330151",
"org_name": "BIAN",
"schema_prompt": "Generate all IDs as unique UUIDs and follow each field\u2019s data\u2011type, distribution, and dependency rules exactly as defined per table. \nRespect foreign\u2011key order: create PARTY \u2192 ACCOUNTS \u2192 TRANSACTION \u2192 dependent tables, ensuring all references resolve correctly. \nEnforce logical timelines, weighted randomness, cross\u2011table consistency, and validation rules (dates, currencies, statuses, risk logic, fraud patterns).",
"schema_list": [
{
"table_id": "723b962f-a95a-436c-9a82-96d7019f0838",
"table_name": "BIAN_PARTY",
"num_entries": 50000,
"ddl": "CREATE TABLE BIAN_PARTY (\n PARTY_ID STRING PRIMARY KEY,\n FULL_NAME STRING,\n DATE_OF_BIRTH DATE,\n GENDER STRING,\n NATIONALITY STRING,\n SSN STRING,\n EMAIL STRING,\n PHONE_NUMBER STRING,\n ADDRESS_LINE1 STRING,\n ADDRESS_LINE2 STRING,\n CITY STRING,\n STATE STRING,\n POSTAL_CODE STRING,\n COUNTRY STRING,\n CREATED_AT TIMESTAMP\n);",
"instructions": "PARTY_ID \u2192 Generate UUID; must be unique.\nFULL_NAME \u2192 Faker full name.\nDATE_OF_BIRTH \u2192 Random date between 1970-01-01 and 1990-12-31.\nGENDER \u2192 Choose from ['MALE','FEMALE','OTHER'] with ~70/29/1 distribution.\nNATIONALITY \u2192 Choose from ['US','IN','AE','UK','CA'] with ~50/20/10/10/10.\nSSN \u2192 Faker SSN-like string; ensure uniqueness per COUNTRY rules if needed.\nEMAIL \u2192 Faker email; prefer globally unique values.\nPHONE_NUMBER \u2192 Faker phone number (E.164 if you need consistency).\nADDRESS_LINE1 \u2192 Faker street address.\nADDRESS_LINE2 \u2192 Faker secondary address or empty ~40% of time.\nCITY \u2192 Faker city.\nSTATE \u2192 Faker state/region.\nPOSTAL_CODE \u2192 Faker postal code.\nCOUNTRY \u2192 Mirror NATIONALITY distribution ['US','IN','AE','UK','CA'] ~50/20/10/10/10.\nCREATED_AT \u2192 Random timestamp within last 10 years.",
"columns_list": []
},
{
"table_id": "f2a278ea-e4f7-4a2a-a2c3-2be28ea485fc",
"table_name": "BIAN_ACCOUNTS",
"num_entries": 10000,
"ddl": "CREATE TABLE BIAN_ACCOUNTS (\n ACCOUNT_ID STRING PRIMARY KEY,\n PARTY_ID STRING REFERENCES BIAN_PARTY(PARTY_ID),\n ACCOUNT_TYPE STRING,\n ACCOUNT_STATUS STRING,\n OPEN_DATE DATE,\n BALANCE NUMBER(18,2),\n CURRENCY STRING,\n INTEREST_RATE FLOAT,\n BRANCH_ID STRING,\n CREATED_AT TIMESTAMP\n);",
"instructions": "ACCOUNT_ID \u2192 Generate UUID; unique.\nPARTY_ID \u2192 Randomly pick from existing BIAN_PARTY(PARTY_ID).\nACCOUNT_TYPE \u2192 60% \"Checking\", 30% \"Savings\", 10% \"Credit Card\".\nACCOUNT_STATUS \u2192 70% \"Active\", 20% \"Dormant\", 10% \"Closed\".\nOPEN_DATE \u2192 Random date within last 15 years.\nBALANCE \u2192 If ACCOUNT_TYPE=\"Credit Card\": 20% negative (\u22125000 to \u2212100), else positive (100 to 100000).\nCURRENCY \u2192 Choose from [\"USD\",\"EUR\",\"GBP\",\"INR\",\"AED\"] weighted by PARTY.COUNTRY if available.\nINTEREST_RATE \u2192 0.1%\u20135% for Checking/Savings, 10%\u201325% for Credit Card.\nBRANCH_ID \u2192 \"BR\" + 3\u20134 digit code (e.g., BR001\u2013BR050).\nCREATED_AT \u2192 Random timestamp on or after OPEN_DATE (spread up to ~10 years)",
"columns_list": []
},
{
"table_id": "d5ef286a-77fc-45b1-ae18-2f0c29a61754",
"table_name": "BIAN_TRANSACTION",
"num_entries": 10000,
"ddl": "CREATE TABLE BIAN_TRANSACTION (\n TRANSACTION_ID STRING PRIMARY KEY,\n ACCOUNT_ID STRING REFERENCES BIAN_ACCOUNTS(ACCOUNT_ID),\n TRANSACTION_TYPE STRING,\n AMOUNT NUMBER(18,2),\n CURRENCY STRING,\n TRANSACTION_TIMESTAMP TIMESTAMP,\n MERCHANT_NAME STRING,\n MERCHANT_CATEGORY STRING,\n LOCATION STRING,\n CHANNEL STRING,\n STATUS STRING,\n CREATED_AT TIMESTAMP\n);",
"instructions": "TRANSACTION_ID \u2192 Generate UUID; unique.\nACCOUNT_ID \u2192 Randomly pick from existing BIAN_ACCOUNTS(ACCOUNT_ID).\nTRANSACTION_TYPE \u2192 Choose from [\"DEPOSIT\",\"WITHDRAWAL\",\"PAYMENT\",\"TRANSFER\",\"FEE\",\"REFUND\"] with realistic skew (e.g., DEPOSIT/WITHDRAWAL heavy).\nAMOUNT \u2192 > 0; typical range 1\u20135000 (tail up to 20000 for high-value).\nCURRENCY \u2192 Inherit from the linked account\u2019s CURRENCY when possible; else pick from [\"USD\",\"EUR\",\"GBP\",\"INR\",\"AED\"].\nTRANSACTION_TIMESTAMP \u2192 Faker datetime between 2010-01-01 and now.\nMERCHANT_NAME \u2192 Faker company (blank for non-merchant types like TRANSFER/FEE).\nMERCHANT_CATEGORY \u2192 Pick from [\"GROCERY\",\"RESTAURANT\",\"FUEL\",\"ONLINE\",\"TRAVEL\",\"UTILITY\",\"OTHER\"]; empty for TRANSFER/FEE.\nLOCATION \u2192 City + Country for merchanted transactions; else empty.\nCHANNEL \u2192 Choose from [\"POS\",\"ONLINE\",\"MOBILE\",\"ATM\",\"BRANCH\"].\nSTATUS \u2192 92% \"POSTED\", 6% \"PENDING\", 2% \"REVERSED\".\nCREATED_AT \u2192 Timestamp on or after TRANSACTION_TIMESTAMP (within +0\u20133 days).\nBehavioral rules \u2192 ~80% non-fraud normal mix; ~10% \u201cstructuring\u201d sequences: per selected ACCOUNT_ID, emit 3\u20135 DEPOSITs of 1000\u20139999 within 1\u20133 days; ~10% high-velocity bursts: 2\u20134 small PAYMENTS/WITHDRAWALS within 30\u2013120 minutes.",
"columns_list": []
},
{
"table_id": "5794c222-24d4-4ad6-88d7-14b45bc4c19d",
"table_name": "BIAN_KYC_PROFILE",
"num_entries": 10000,
"ddl": "CREATE TABLE BIAN_KYC_PROFILE (\n KYC_ID STRING PRIMARY KEY,\n PARTY_ID STRING REFERENCES BIAN_PARTY(PARTY_ID),\n KYC_STATUS STRING,\n REVIEW_DATE DATE,\n RISK_LEVEL STRING,\n REVIEWER_ID STRING,\n LAST_UPDATED TIMESTAMP\n);",
"instructions": "KYC_ID \u2192 UUID; unique.\nPARTY_ID \u2192 Random from BIAN_PARTY(PARTY_ID).\nKYC_STATUS \u2192 70% \"Verified\", 20% \"Pending\", 10% \"Rejected\".\nREVIEW_DATE \u2192 Random date within last 3 years (bias to last 90 days if \"Pending\").\nRISK_LEVEL \u2192 25% \"High\", 50% \"Medium\", 25% \"Low\" (increase \"High\" odds when KYC_STATUS=\"Rejected\").\nREVIEWER_ID \u2192 From pool EMP1001\u2013EMP1020.\nLAST_UPDATED \u2192 Timestamp on or after REVIEW_DATE (very recent if \"Pending\").",
"columns_list": []
},
{
"table_id": "13775687-07c5-4b0b-9d9d-b5d7e0bc5271",
"table_name": "BIAN_DOCUMENT",
"num_entries": 5000,
"ddl": "CREATE TABLE BIAN_DOCUMENT (\n DOC_ID STRING PRIMARY KEY,\n PARTY_ID STRING REFERENCES BIAN_PARTY(PARTY_ID),\n DOC_TYPE STRING,\n DOC_NUMBER STRING,\n ISSUE_DATE DATE,\n EXPIRY_DATE DATE,\n ISSUING_AUTHORITY STRING,\n DOC_STATUS STRING,\n UPLOADED_AT TIMESTAMP\n);",
"instructions": "DOC_ID \u2192 UUID; unique.\nPARTY_ID \u2192 Random from BIAN_PARTY(PARTY_ID).\nDOC_TYPE \u2192 50% \"Passport\", 30% \"Driver License\", 20% \"National ID\".\nDOC_NUMBER \u2192 Pattern by DOC_TYPE (e.g., Passport: 2 letters + 7 digits; Driver License: alnum 8\u201312; National ID: digits 10\u201312).\nISSUE_DATE \u2192 Random date within last 15 years.\nEXPIRY_DATE \u2192 After ISSUE_DATE: 5\u201310 years for Passport/Driver License, ~10 years for National ID.\nISSUING_AUTHORITY \u2192 From [\"Passport Office\",\"DMV\",\"National ID Authority\"] by DOC_TYPE.\nDOC_STATUS \u2192 85% \"Active\", 15% \"Expired\" (force \"Expired\" if EXPIRY_DATE < today).\nUPLOADED_AT \u2192 Timestamp within \u00b17 days of ISSUE_DATE.",
"columns_list": []
},
{
"table_id": "8908102b-54fa-4c4f-8e4a-12e511ec2cd0",
"table_name": "BIAN_RISK_ASSESSMENT",
"num_entries": 10000,
"ddl": "CREATE TABLE BIAN_RISK_ASSESSMENT (\n RISK_ID STRING PRIMARY KEY,\n PARTY_ID STRING REFERENCES BIAN_PARTY(PARTY_ID),\n RISK_MODEL_NAME STRING,\n RISK_SCORE FLOAT,\n RISK_CATEGORY STRING,\n REVIEWED_BY STRING,\n REVIEW_DATE DATE,\n RECOMMENDATION STRING,\n CREATED_AT TIMESTAMP\n);",
"instructions": "RISK_ID \u2192 UUID; unique.\nPARTY_ID \u2192 Random from BIAN_PARTY(PARTY_ID).\nRISK_MODEL_NAME \u2192 Choose from [\"MODEL_A\",\"MODEL_B\",\"MODEL_C\"].\nRISK_SCORE \u2192 Float 0\u2013100 with 20% > 80 (high-risk).\nRISK_CATEGORY \u2192 Derive: \"High\" if >80, \"Medium\" if 40\u201380, \"Low\" if <40.\nREVIEWED_BY \u2192 EMP2001\u2013EMP2020.\nREVIEW_DATE \u2192 Date within last 2 years.\nRECOMMENDATION \u2192 From [\"Enhanced Due Diligence\",\"Monitor\",\"Proceed\"] based on RISK_CATEGORY.\nCREATED_AT \u2192 Timestamp on or after REVIEW_DATE.",
"columns_list": []
},
{
"table_id": "964b37a1-65f4-457d-ad05-4706754800cb",
"table_name": "BIAN_DEVICE_ACCESS",
"num_entries": 50000,
"ddl": "CREATE TABLE BIAN_DEVICE_ACCESS (\n DEVICE_ID STRING PRIMARY KEY,\n PARTY_ID STRING REFERENCES BIAN_PARTY(PARTY_ID),\n DEVICE_TYPE STRING,\n DEVICE_OS STRING,\n IP_ADDRESS STRING,\n LOCATION STRING,\n LOGIN_TIME TIMESTAMP,\n LOGOUT_TIME TIMESTAMP,\n AUTH_METHOD STRING,\n SESSION_STATUS STRING\n);",
"instructions": "DEVICE_ID \u2192 UUID; unique.\nPARTY_ID \u2192 Random from BIAN_PARTY(PARTY_ID).\nDEVICE_TYPE \u2192 60% \"Mobile\", 30% \"Desktop\", 10% \"Tablet\".\nDEVICE_OS \u2192 By DEVICE_TYPE: Mobile \u2192 [\"iOS\",\"Android\"]; Desktop \u2192 [\"Windows\",\"macOS\",\"Linux\"]; Tablet \u2192 [\"iPadOS\",\"Android\"].\nIP_ADDRESS \u2192 Valid IPv4 or IPv6.\nLOCATION \u2192 City + Country.\nLOGIN_TIME \u2192 Timestamp within last 6 months.\nLOGOUT_TIME \u2192 After LOGIN_TIME (0.1\u20138 hours later); 5% sessions may omit LOGOUT_TIME to simulate active sessions.\nAUTH_METHOD \u2192 15% \"Biometric\", 50% \"Password\", 35% \"Two-Factor\".\nSESSION_STATUS \u2192 \"Active\" if no LOGOUT_TIME else \"Closed\".",
"columns_list": []
},
{
"table_id": "1db89873-2ac7-4480-a1cb-be4c598ba204",
"table_name": "BIAN_FRAUD_ALERT",
"num_entries": 17000,
"ddl": "CREATE TABLE BIAN_FRAUD_ALERT (\n ALERT_ID STRING PRIMARY KEY,\n TRANSACTION_ID STRING REFERENCES BIAN_TRANSACTION(TRANSACTION_ID),\n PARTY_ID STRING REFERENCES BIAN_PARTY(PARTY_ID),\n ALERT_TYPE STRING,\n TRIGGERED_AT TIMESTAMP,\n RESOLUTION_STATUS STRING,\n RESOLVED_BY STRING\n);",
"instructions": "ALERT_ID \u2192 UUID; unique.\nTRANSACTION_ID \u2192 Random from BIAN_TRANSACTION(TRANSACTION_ID).\nPARTY_ID \u2192 Random from BIAN_PARTY(PARTY_ID) (ideally matching the party behind the linked account).\nALERT_TYPE \u2192 Choose from [\"Unusual Location\",\"Multiple Failed Logins\",\"High-Value Transaction\",\"Suspicious IP\"].\nTRIGGERED_AT \u2192 Timestamp within last 1 year.\nRESOLUTION_STATUS \u2192 60% \"Open\", 30% \"Resolved\", 10% \"Escalated\".\nRESOLVED_BY \u2192 If RESOLUTION_STATUS in (\"Resolved\",\"Escalated\"): EMP3001\u2013EMP3050 else NULL.",
"columns_list": []
}
],
"schema_gen_status": "DONE",
"schema_gen_last_update": "2026-04-08 17:22:07",
"schema_gen_log": "Files saved locally.",
"dg_code_gen_status": "DONE",
"dg_code_gen_at": "2026-04-08 17:18:19",
"dg_code_gen_log": "Code generated successfully.",
"dg_bulkdata_gen_status": "DONE",
"dg_bulkdata_gen_at": "2026-04-08 17:22:05",
"dg_bulkdata_gen_log": "CSV generation completed.",
"dg_sf_upload_status": "DONE",
"dg_sf_upload_at": "2026-04-08 17:22:07",
"dg_sf_upload_log": "Validation for schema 'BIAN': 32/32 checks passed.",
"last_run_log_path": "C:\\Users\\aakas\\Desktop\\DLS\\Datagenx\\datagenx\\schemas\\BIAN\\logs\\latest.log",
"last_error_trace": "Generated data script failed. File: C:\\Users\\aakas\\Desktop\\DLS\\Datagenx\\datagenx\\schemas\\BIAN\\code_generated\\generated_code.py. Details: sers\\aakas\\Desktop\\DLS\\Datagenx\\datagenx\\schemas\\BIAN\\code_generated\\generated_code.py\", line 280, in <module>\n main()\n File \"C:\\Users\\aakas\\Desktop\\DLS\\Datagenx\\datagenx\\schemas\\BIAN\\code_generated\\generated_code.py\", line 261, in main\n transactions = generate_transactions_data(accounts, 10000)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"C:\\Users\\aakas\\Desktop\\DLS\\Datagenx\\datagenx\\schemas\\BIAN\\code_generated\\generated_code.py\", line 100, in generate_transactions_data\n transaction_timestamp = fake.date_time_between(start_date='2010-01-01', end_date='now')\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"C:\\Users\\aakas\\Desktop\\DLS\\Datagenx\\datagenx\\.venv\\Lib\\site-packages\\faker\\providers\\date_time\\__init__.py\", line 2103, in date_time_between\n start_date = self._parse_date_time(start_date, tzinfo=tzinfo)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"C:\\Users\\aakas\\Desktop\\DLS\\Datagenx\\datagenx\\.venv\\Lib\\site-packages\\faker\\providers\\date_time\\__init__.py\", line 2063, in _parse_date_time\n time_params = cls._parse_date_string(value)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"C:\\Users\\aakas\\Desktop\\DLS\\Datagenx\\datagenx\\.venv\\Lib\\site-packages\\faker\\providers\\date_time\\__init__.py\", line 2039, in _parse_date_string\n raise ParseError(f\"Can't parse date string `{value}`\")\nfaker.providers.date_time.ParseError: Can't parse date string `2010-01-01`\nTraceback (most recent call last):\n File \"C:\\Users\\aakas\\Desktop\\DLS\\Datagenx\\datagenx\\data_generation_backend.py\", line 994, in generate_schema_data\n run = subprocess.run(\n ^^^^^^^^^^^^^^^\n File \"C:\\Users\\aakas\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\subprocess.py\", line 571, in run\n raise CalledProcessError(retcode, process.args,\nsubprocess.CalledProcessError: Command '['C:\\\\Users\\\\aakas\\\\Desktop\\\\DLS\\\\Datagenx\\\\datagenx\\\\.venv\\\\Scripts\\\\python.exe', 'C:\\\\Users\\\\aakas\\\\Desktop\\\\DLS\\\\Datagenx\\\\datagenx\\\\schemas\\\\BIAN\\\\code_generated\\\\generated_code.py']' returned non-zero exit status 1.\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n File \"C:\\Users\\aakas\\Desktop\\DLS\\Datagenx\\datagenx\\data_generation_backend.py\", line 1024, in generate_schema_data\n raise RuntimeError(\nRuntimeError: Generated data script failed. File: C:\\Users\\aakas\\Desktop\\DLS\\Datagenx\\datagenx\\schemas\\BIAN\\code_generated\\generated_code.py. Details: sers\\aakas\\Desktop\\DLS\\Datagenx\\datagenx\\schemas\\BIAN\\code_generated\\generated_code.py\", line 280, in <module>\n main()\n File \"C:\\Users\\aakas\\Desktop\\DLS\\Datagenx\\datagenx\\schemas\\BIAN\\code_generated\\generated_code.py\", line 261, in main\n transactions = generate_transactions_data(accounts, 10000)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"C:\\Users\\aakas\\Desktop\\DLS\\Datagenx\\datagenx\\schemas\\BIAN\\code_generated\\generated_code.py\", line 100, in generate_transactions_data\n transaction_timestamp = fake.date_time_between(start_date='2010-01-01', end_date='now')\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"C:\\Users\\aakas\\Desktop\\DLS\\Datagenx\\datagenx\\.venv\\Lib\\site-packages\\faker\\providers\\date_time\\__init__.py\", line 2103, in date_time_between\n start_date = self._parse_date_time(start_date, tzinfo=tzinfo)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"C:\\Users\\aakas\\Desktop\\DLS\\Datagenx\\datagenx\\.venv\\Lib\\site-packages\\faker\\providers\\date_time\\__init__.py\", line 2063, in _parse_date_time\n time_params = cls._parse_date_string(value)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"C:\\Users\\aakas\\Desktop\\DLS\\Datagenx\\datagenx\\.venv\\Lib\\site-packages\\faker\\providers\\date_time\\__init__.py\", line 2039, in _parse_date_string\n raise ParseError(f\"Can't parse date string `{value}`\")\nfaker.providers.date_time.ParseError: Can't parse date string `2010-01-01`\n"
},
{
"p_id": "1",
"org_id": "1776111744",
"org_name": "Retail",
"schema_prompt": "Generate realistic synthetic retail data for a US-based omnichannel retailer. Preserve referential integrity across all tables, keep dates logically consistent with no future dates, and generate varied but coherent customer and order behavior suitable for testing analytics and downstream pipelines.",
"schema_list": [
{
"table_id": "16c964f7-b502-44f8-b4d9-0acf17dd3b54",
"table_name": "RETAIL_CUSTOMERS",
"num_entries": 2000,
"ddl": "CREATE TABLE RETAIL_CUSTOMERS (\n CUSTOMER_ID BIGINT PRIMARY KEY,\n FIRST_NAME VARCHAR(50) NOT NULL,\n LAST_NAME VARCHAR(50) NOT NULL,\n EMAIL VARCHAR(120) UNIQUE NOT NULL,\n PHONE VARCHAR(20),\n CUSTOMER_SEGMENT VARCHAR(20) NOT NULL,\n LOYALTY_TIER VARCHAR(20),\n CITY VARCHAR(60),\n STATE_CODE CHAR(2),\n COUNTRY_CODE CHAR(2) NOT NULL,\n CREATED_AT TIMESTAMP NOT NULL,\n UPDATED_AT TIMESTAMP\n);",
"instructions": "Generate realistic US retail customers with unique emails and plausible phone numbers. CUSTOMER_SEGMENT should be one of {NEW,REGULAR,VIP} with a reasonable business distribution. LOYALTY_TIER should be one of {BRONZE,SILVER,GOLD,PLATINUM} and may be null for some NEW customers. COUNTRY_CODE should be mostly 'US'. CREATED_AT must be in the past and UPDATED_AT must be on or after CREATED_AT when present.",
"columns_list": []
},
{
"table_id": "0fbabd3a-ee3a-4921-b2b9-6070daa73f5a",
"table_name": "RETAIL_ORDERS",
"num_entries": 5000,
"ddl": "CREATE TABLE RETAIL_ORDERS (\n ORDER_ID BIGINT PRIMARY KEY,\n CUSTOMER_ID BIGINT NOT NULL,\n ORDER_DATE TIMESTAMP NOT NULL,\n ORDER_STATUS VARCHAR(20) NOT NULL,\n SALES_CHANNEL VARCHAR(20) NOT NULL,\n PAYMENT_METHOD VARCHAR(20) NOT NULL,\n CURRENCY_CODE CHAR(3) NOT NULL,\n SUBTOTAL_AMOUNT DECIMAL(12,2) NOT NULL,\n TAX_AMOUNT DECIMAL(12,2) NOT NULL,\n SHIPPING_AMOUNT DECIMAL(12,2) NOT NULL,\n TOTAL_AMOUNT DECIMAL(12,2) NOT NULL,\n SHIPPING_CITY VARCHAR(60),\n SHIPPING_STATE_CODE CHAR(2),\n CREATED_AT TIMESTAMP NOT NULL,\n FOREIGN KEY (CUSTOMER_ID) REFERENCES RETAIL_CUSTOMERS(CUSTOMER_ID)\n);",
"instructions": "Every CUSTOMER_ID must exist in RETAIL_CUSTOMERS. ORDER_STATUS should be one of {PLACED,SHIPPED,DELIVERED,CANCELLED,RETURNED} with DELIVERED being most common. SALES_CHANNEL should be one of {ONLINE,STORE,MOBILE}. PAYMENT_METHOD should be one of {CARD,CASH,WALLET,GIFTCARD}. CURRENCY_CODE should be 'USD' for almost all rows. Monetary values must be positive where appropriate and TOTAL_AMOUNT must equal SUBTOTAL_AMOUNT + TAX_AMOUNT + SHIPPING_AMOUNT. ORDER_DATE and CREATED_AT must be in the past, and CREATED_AT should be on or after ORDER_DATE.",
"columns_list": []
}
],
"schema_gen_status": "DONE",
"schema_gen_last_update": "2026-04-14 16:37:40",
"schema_gen_log": "Files saved locally.",
"dg_code_gen_status": "DONE",
"dg_code_gen_at": "2026-04-14 16:37:36",
"dg_code_gen_log": "Code generated successfully.",
"dg_bulkdata_gen_status": "DONE",
"dg_bulkdata_gen_at": "2026-04-14 16:37:40",
"dg_bulkdata_gen_log": "CSV generation completed.",
"dg_sf_upload_status": "DONE",
"dg_sf_upload_at": "2026-04-14 16:37:40",
"dg_sf_upload_log": "Validation for schema 'Retail': 8/8 checks passed.",
"last_run_log_path": "C:\\Users\\aakas\\Desktop\\DLS\\Datagenx\\datagenx\\schemas\\Retail\\logs\\latest.log"
},
{
"p_id": "1",
"org_id": "1776203156",
"org_name": "Healthcare_200_columns",
"schema_prompt": "Generate realistic synthetic US healthcare data for one standalone table. Keep all values consistent with the DDL, avoid future dates unless explicitly allowed, and make the dataset suitable for testing and analytics. For any column that is not explicitly described in the table instructions, honor the DDL data type, nullability, and reasonable business meaning of the column name so the generated file does not fail due to type or format mismatches.",
"schema_list": [
{
"table_id": "28adb30f-a8fc-4e19-ba39-b37b6d7f5fdb",
"table_name": "PATIENT_ENCOUNTERS",
"num_entries": 100,
"ddl": "CREATE TABLE PATIENT_ENCOUNTERS (\n ENCOUNTER_ID BIGINT PRIMARY KEY,\n PATIENT_ID BIGINT NOT NULL,\n MRN VARCHAR(20) UNIQUE NOT NULL,\n FIRST_NAME VARCHAR(50) NOT NULL,\n MIDDLE_NAME VARCHAR(50),\n LAST_NAME VARCHAR(50) NOT NULL,\n DATE_OF_BIRTH DATE NOT NULL,\n GENDER VARCHAR(20) NOT NULL,\n PHONE_HOME VARCHAR(20),\n PHONE_MOBILE VARCHAR(20),\n EMAIL VARCHAR(120),\n ADDRESS_LINE1 VARCHAR(120),\n ADDRESS_LINE2 VARCHAR(120),\n CITY VARCHAR(60),\n STATE_CODE CHAR(2),\n ZIP_CODE VARCHAR(10),\n COUNTRY_CODE CHAR(2),\n MARITAL_STATUS VARCHAR(20),\n LANGUAGE_CODE VARCHAR(10),\n RACE VARCHAR(30),\n ETHNICITY VARCHAR(30),\n PATIENT_STATUS VARCHAR(20),\n PRIMARY_CARE_PROVIDER VARCHAR(100),\n GUARANTOR_NAME VARCHAR(100),\n EMERGENCY_CONTACT_NAME VARCHAR(100),\n EMERGENCY_CONTACT_PHONE VARCHAR(20),\n EMPLOYER_NAME VARCHAR(100),\n OCCUPATION VARCHAR(80),\n HEIGHT_CM DECIMAL(6,2),\n WEIGHT_KG DECIMAL(6,2),\n BMI DECIMAL(5,2),\n BLOOD_TYPE VARCHAR(5),\n ALLERGY_SUMMARY VARCHAR(255),\n SMOKING_STATUS VARCHAR(20),\n ALCOHOL_USE VARCHAR(20),\n PREGNANCY_STATUS VARCHAR(20),\n CHRONIC_CONDITION_FLAG BOOLEAN,\n DIABETES_FLAG BOOLEAN,\n HYPERTENSION_FLAG BOOLEAN,\n ASTHMA_FLAG BOOLEAN,\n HEART_DISEASE_FLAG BOOLEAN,\n ENCOUNTER_DATE TIMESTAMP NOT NULL,\n ADMISSION_DATE TIMESTAMP,\n DISCHARGE_DATE TIMESTAMP,\n ENCOUNTER_TYPE VARCHAR(30),\n VISIT_REASON VARCHAR(255),\n CHIEF_COMPLAINT VARCHAR(255),\n DEPARTMENT VARCHAR(50) NOT NULL,\n SUB_DEPARTMENT VARCHAR(50),\n FACILITY_NAME VARCHAR(100),\n FACILITY_ID VARCHAR(30),\n ROOM_NUMBER VARCHAR(20),\n BED_NUMBER VARCHAR(20),\n PROVIDER_ID VARCHAR(30),\n PROVIDER_NAME VARCHAR(100),\n ATTENDING_PHYSICIAN VARCHAR(100),\n REFERRING_PHYSICIAN VARCHAR(100),\n NURSE_NAME VARCHAR(100),\n TRIAGE_LEVEL VARCHAR(20),\n APPOINTMENT_ID VARCHAR(30),\n APPOINTMENT_STATUS VARCHAR(20),\n CHECKIN_TIME TIMESTAMP,\n CHECKOUT_TIME TIMESTAMP,\n WAIT_TIME_MINUTES INT,\n LENGTH_OF_STAY_HOURS DECIMAL(8,2),\n IS_FOLLOW_UP BOOLEAN NOT NULL,\n READMISSION_FLAG BOOLEAN,\n TELEHEALTH_FLAG BOOLEAN,\n NO_SHOW_FLAG BOOLEAN,\n DISCHARGE_DISPOSITION VARCHAR(50),\n DIAGNOSIS_CODE_1 VARCHAR(10),\n DIAGNOSIS_DESC_1 VARCHAR(255),\n DIAGNOSIS_CODE_2 VARCHAR(10),\n DIAGNOSIS_DESC_2 VARCHAR(255),\n DIAGNOSIS_CODE_3 VARCHAR(10),\n DIAGNOSIS_DESC_3 VARCHAR(255),\n PROCEDURE_CODE_1 VARCHAR(15),\n PROCEDURE_DESC_1 VARCHAR(255),\n PROCEDURE_CODE_2 VARCHAR(15),\n PROCEDURE_DESC_2 VARCHAR(255),\n PROCEDURE_CODE_3 VARCHAR(15),\n PROCEDURE_DESC_3 VARCHAR(255),\n MEDICATION_1 VARCHAR(100),\n MEDICATION_1_DOSE VARCHAR(50),\n MEDICATION_1_ROUTE VARCHAR(30),\n MEDICATION_2 VARCHAR(100),\n MEDICATION_2_DOSE VARCHAR(50),\n MEDICATION_2_ROUTE VARCHAR(30),\n MEDICATION_3 VARCHAR(100),\n MEDICATION_3_DOSE VARCHAR(50),\n MEDICATION_3_ROUTE VARCHAR(30),\n LAB_ORDER_ID VARCHAR(30),\n LAB_PANEL_NAME VARCHAR(100),\n LAB_RESULT_STATUS VARCHAR(20),\n HEMOGLOBIN_VALUE DECIMAL(6,2),\n WBC_COUNT DECIMAL(10,2),\n PLATELET_COUNT DECIMAL(10,2),\n GLUCOSE_MG_DL DECIMAL(8,2),\n CREATININE_MG_DL DECIMAL(8,2),\n SODIUM_MMOL_L DECIMAL(8,2),\n POTASSIUM_MMOL_L DECIMAL(8,2),\n CHLORIDE_MMOL_L DECIMAL(8,2),\n BUN_MG_DL DECIMAL(8,2),\n ALT_U_L DECIMAL(8,2),\n AST_U_L DECIMAL(8,2),\n TOTAL_CHOLESTEROL_MG_DL DECIMAL(8,2),\n HDL_MG_DL DECIMAL(8,2),\n LDL_MG_DL DECIMAL(8,2),\n TRIGLYCERIDES_MG_DL DECIMAL(8,2),\n A1C_PERCENT DECIMAL(5,2),\n COVID_TEST_RESULT VARCHAR(20),\n FLU_TEST_RESULT VARCHAR(20),\n PULSE INT,\n RESPIRATORY_RATE INT,\n TEMPERATURE_C DECIMAL(5,2),\n OXYGEN_SATURATION DECIMAL(5,2),\n SYSTOLIC_BP INT,\n DIASTOLIC_BP INT,\n PAIN_SCORE INT,\n FALL_RISK_SCORE INT,\n BILL_AMOUNT DECIMAL(12,2),\n ALLOWED_AMOUNT DECIMAL(12,2),\n PAID_AMOUNT DECIMAL(12,2),\n PATIENT_RESPONSIBILITY DECIMAL(12,2),\n COPAY_AMOUNT DECIMAL(12,2),\n COINSURANCE_AMOUNT DECIMAL(12,2),\n DEDUCTIBLE_AMOUNT DECIMAL(12,2),\n ADJUSTMENT_AMOUNT DECIMAL(12,2),\n WRITE_OFF_AMOUNT DECIMAL(12,2),\n OUTSTANDING_BALANCE DECIMAL(12,2),\n PAYMENT_PLAN_FLAG BOOLEAN,\n PAYMENT_STATUS VARCHAR(20),\n CLAIM_ID VARCHAR(30),\n CLAIM_STATUS VARCHAR(20),\n CLAIM_SUBMITTED_DATE DATE,\n CLAIM_PAID_DATE DATE,\n CLAIM_DENIAL_REASON VARCHAR(100),\n REVENUE_CODE VARCHAR(10),\n DRG_CODE VARCHAR(10),\n CPT_HCPCS_CODE VARCHAR(15),\n ICD_VERSION VARCHAR(10),\n INSURANCE_PLAN VARCHAR(50),\n PAYER_NAME VARCHAR(100),\n PAYER_TYPE VARCHAR(30),\n POLICY_NUMBER VARCHAR(40),\n GROUP_NUMBER VARCHAR(40),\n SUBSCRIBER_ID VARCHAR(40),\n SUBSCRIBER_RELATIONSHIP VARCHAR(30),\n PRIOR_AUTH_NUMBER VARCHAR(40),\n COVERAGE_EFFECTIVE_DATE DATE,\n COVERAGE_END_DATE DATE,\n NETWORK_STATUS VARCHAR(20),\n REFERRAL_REQUIRED_FLAG BOOLEAN,\n CASE_MANAGER_NAME VARCHAR(100),\n CARE_GAP_FLAG BOOLEAN,\n RISK_SCORE DECIMAL(6,2),\n SEVERITY_SCORE DECIMAL(6,2),\n QUALITY_SCORE DECIMAL(6,2),\n SATISFACTION_SCORE DECIMAL(6,2),\n TRANSPORT_MODE VARCHAR(30),\n ARRIVAL_SOURCE VARCHAR(30),\n DISCHARGE_INSTRUCTIONS VARCHAR(255),\n FOLLOW_UP_DUE_DATE DATE,\n NEXT_APPOINTMENT_DATE DATE,\n PORTAL_ENROLLED_FLAG BOOLEAN,\n SMS_CONSENT_FLAG BOOLEAN,\n EMAIL_CONSENT_FLAG BOOLEAN,\n DATA_SOURCE_SYSTEM VARCHAR(50),\n IMPORT_BATCH_ID VARCHAR(40),\n RECORD_VERSION INT,\n IS_ACTIVE BOOLEAN,\n IS_DELETED BOOLEAN,\n CREATED_AT TIMESTAMP NOT NULL,\n UPDATED_AT TIMESTAMP,\n CREATED_BY VARCHAR(50),\n UPDATED_BY VARCHAR(50),\n CUSTOM_ATTR_001 VARCHAR(100),\n CUSTOM_ATTR_002 VARCHAR(100),\n CUSTOM_ATTR_003 VARCHAR(100),\n CUSTOM_ATTR_004 VARCHAR(100),\n CUSTOM_ATTR_005 VARCHAR(100),\n CUSTOM_ATTR_006 VARCHAR(100),\n CUSTOM_ATTR_007 VARCHAR(100),\n CUSTOM_ATTR_008 VARCHAR(100),\n CUSTOM_ATTR_009 VARCHAR(100),\n CUSTOM_ATTR_010 VARCHAR(100),\n CUSTOM_NUM_001 DECIMAL(12,2),\n CUSTOM_NUM_002 DECIMAL(12,2),\n CUSTOM_NUM_003 DECIMAL(12,2),\n CUSTOM_NUM_004 DECIMAL(12,2),\n CUSTOM_NUM_005 DECIMAL(12,2),\n CUSTOM_DATE_001 DATE,\n CUSTOM_DATE_002 DATE,\n CUSTOM_DATE_003 DATE,\n CUSTOM_FLAG_001 BOOLEAN,\n CUSTOM_FLAG_002 BOOLEAN,\n CUSTOM_FLAG_003 BOOLEAN,\n CUSTOM_FLAG_004 BOOLEAN,\n NOTES VARCHAR(500),\n RECORD_HASH VARCHAR(128)\n);",
"instructions": "Generate 200 realistic US healthcare encounter records for this single 200-column table. Do not leave missing values, blank strings, nulls, or empty fields in the output. Populate every column in every row with a sensible value that matches the DDL data type and the business meaning of the column. Give explicit attention to these 10 columns: 1. PATIENT_ID should be a positive numeric identifier and unique in this table. 2. MRN should be unique and follow a simple pattern like MRN plus 8 digits. 3. DATE_OF_BIRTH should be a valid past date and produce ages roughly between 0 and 95. 4. GENDER should use values from {MALE,FEMALE,OTHER}. 5. PHONE_MOBILE and PHONE_HOME must use only one US phone number format for all rows: +1-XXX-XXX-XXXX. Do not mix formats. 6. EMAIL should be valid and populated for every row. 7. ENCOUNTER_DATE should be in the past, typically within the last 2 years, and must be after DATE_OF_BIRTH. 8. DEPARTMENT should be one of {CARDIOLOGY,ORTHOPEDICS,PEDIATRICS,ONCOLOGY,EMERGENCY,GENERAL_MEDICINE}. 9. DIAGNOSIS_CODE_1 should look like a realistic ICD-style diagnosis code. 10. BILL_AMOUNT should be a positive decimal amount with 2 digits after the decimal, typically between 50.00 and 25000.00. For all remaining columns not explicitly described above, honor the DDL data types, lengths, and sensible healthcare meaning of the column names, and always generate a populated value so the output does not contain missing data or datatype-related errors.",
"columns_list": []
},
{
"table_id": "52a6ca98-9f4b-4567-938d-4d71d34523d3",
"table_name": "ENCOUNTER_DIAGNOSES",
"num_entries": 50,
"ddl": "CREATE TABLE ENCOUNTER_DIAGNOSES (\n ENCOUNTER_ID BIGINT NOT NULL,\n DIAGNOSIS_SEQUENCE INT NOT NULL,\n DIAGNOSIS_CODE VARCHAR(10) NOT NULL,\n DIAGNOSIS_DESCRIPTION VARCHAR(255) NOT NULL,\n DIAGNOSIS_TYPE VARCHAR(20) NOT NULL,\n PRESENT_ON_ADMISSION_FLAG BOOLEAN NOT NULL,\n CHRONIC_FLAG BOOLEAN NOT NULL,\n DIAGNOSIS_DATE DATE NOT NULL,\n CODING_SYSTEM VARCHAR(10) NOT NULL,\n CREATED_AT TIMESTAMP NOT NULL,\n PRIMARY KEY (ENCOUNTER_ID, DIAGNOSIS_SEQUENCE),\n FOREIGN KEY (ENCOUNTER_ID) REFERENCES PATIENT_ENCOUNTERS(ENCOUNTER_ID)\n);",
"instructions": "Generate 400 healthcare diagnosis records linked to PATIENT_ENCOUNTERS. Every column should follow these exact rules: 1. ENCOUNTER_ID must always reference an existing PATIENT_ENCOUNTERS.ENCOUNTER_ID value. 2. DIAGNOSIS_SEQUENCE must be a positive integer such as 1, 2, or 3, and the composite key (ENCOUNTER_ID, DIAGNOSIS_SEQUENCE) must be unique for every row. 3. DIAGNOSIS_CODE must be populated for every row and look like a realistic ICD-style diagnosis code. 4. DIAGNOSIS_DESCRIPTION must be populated for every row and should match the diagnosis code in a believable way. 5. DIAGNOSIS_TYPE must be one of {PRIMARY,SECONDARY,ADMITTING,FINAL}. 6. PRESENT_ON_ADMISSION_FLAG must be either true or false and should look realistic relative to the diagnosis type. 7. CHRONIC_FLAG must be either true or false and should be true for a realistic minority of rows. 8. DIAGNOSIS_DATE must be a valid past date and should be on or before the linked encounter date. 9. CODING_SYSTEM must be populated for every row and should be one of {ICD10,ICD9}. 10. CREATED_AT must be a valid timestamp in the past and should be on or after DIAGNOSIS_DATE.",
"columns_list": []
}
],
"schema_gen_status": "DONE",
"schema_gen_last_update": "2026-04-15 15:58:54",
"schema_gen_log": "Files saved locally.",
"dg_code_gen_status": "DONE",
"dg_code_gen_at": "2026-04-15 15:58:53",
"dg_code_gen_log": "Code generated successfully.",
"dg_bulkdata_gen_status": "DONE",
"dg_bulkdata_gen_at": "2026-04-15 15:58:54",
"dg_bulkdata_gen_log": "CSV generation completed.",
"dg_sf_upload_status": "DONE",
"dg_sf_upload_at": "2026-04-15 15:58:54",
"dg_sf_upload_log": "Validation for schema 'Healthcare_200_columns': 8/12 checks passed. Relationships=1, distributions=0, conditionals=0, profiles=0.",
"last_run_log_path": "C:\\Users\\aakas\\Desktop\\DLS\\Datagenx\\datagenx\\schemas\\Healthcare_200_columns\\logs\\latest.log"
},
{
"p_id": "1",
"org_id": "1776288680",
"org_name": "Healthcare_10_Table_Scale",
"schema_prompt": "Generate large-scale US healthcare data for a 10-table schema. Maintain referential integrity across patient, provider, facility, appointment, encounter, diagnosis, procedure, medication, lab, and claim tables. Use realistic healthcare distributions, avoid impossible dates, and honor DDL datatypes and nullability. This schema is intended to stress test scale with row counts ranging from 150k to 1M while also including one wide 200-column encounter table.",
"schema_list": [
{
"table_id": "88e5fe4b-67f9-4323-af9f-cc3835eaf620",
"table_name": "PATIENT_MASTER",
"num_entries": 1000000,
"ddl": "CREATE TABLE PATIENT_MASTER (\n PATIENT_ID BIGINT PRIMARY KEY,\n MRN VARCHAR(20) UNIQUE NOT NULL,\n FIRST_NAME VARCHAR(50) NOT NULL,\n MIDDLE_NAME VARCHAR(50),\n LAST_NAME VARCHAR(50) NOT NULL,\n DATE_OF_BIRTH DATE NOT NULL,\n GENDER VARCHAR(20) NOT NULL,\n PHONE_HOME VARCHAR(20),\n PHONE_MOBILE VARCHAR(20),\n EMAIL VARCHAR(120),\n ADDRESS_LINE1 VARCHAR(120),\n ADDRESS_LINE2 VARCHAR(120),\n CITY VARCHAR(60),\n STATE_CODE CHAR(2),\n ZIP_CODE VARCHAR(10),\n COUNTRY_CODE CHAR(2),\n MARITAL_STATUS VARCHAR(20),\n LANGUAGE_CODE VARCHAR(10),\n RACE VARCHAR(30),\n ETHNICITY VARCHAR(30),\n DECEASED_FLAG BOOLEAN,\n DECEASED_DATE DATE,\n PRIMARY_CARE_PROVIDER_ID VARCHAR(30),\n PORTAL_STATUS VARCHAR(20),\n CREATED_AT TIMESTAMP NOT NULL,\n UPDATED_AT TIMESTAMP,\n FOREIGN KEY (PRIMARY_CARE_PROVIDER_ID) REFERENCES PROVIDERS(PROVIDER_ID)\n);",
"instructions": "Generate realistic US patient master data. MRN must be unique in pattern MRN plus 8 digits. GENDER in {MALE,FEMALE,OTHER} with 48/50/2 split. PORTAL_STATUS in {ACTIVE,INACTIVE,PENDING} with 55/30/15 split. DATE_OF_BIRTH must be in the past and produce ages 0-95. PHONE_HOME and PHONE_MOBILE should use +1-XXX-XXX-XXXX format. EMAIL should be valid for most adult patients. PRIMARY_CARE_PROVIDER_ID must reference PROVIDERS when populated.",
"columns_list": []
},
{
"table_id": "9f63aa56-c040-4f9a-b1e7-fc8e3a99df35",
"table_name": "PROVIDERS",
"num_entries": 150000,
"ddl": "CREATE TABLE PROVIDERS (\n PROVIDER_ID VARCHAR(30) PRIMARY KEY,\n NPI VARCHAR(20) UNIQUE NOT NULL,\n PROVIDER_FIRST_NAME VARCHAR(50) NOT NULL,\n PROVIDER_LAST_NAME VARCHAR(50) NOT NULL,\n CREDENTIALS VARCHAR(20),\n SPECIALTY VARCHAR(50) NOT NULL,\n SUBSPECIALTY VARCHAR(50),\n DEPARTMENT VARCHAR(50),\n PROVIDER_TYPE VARCHAR(30) NOT NULL,\n PHONE_NUMBER VARCHAR(20),\n EMAIL VARCHAR(120),\n FACILITY_ID VARCHAR(30),\n LICENSE_STATE CHAR(2),\n LICENSE_NUMBER VARCHAR(30),\n DEA_NUMBER VARCHAR(20),\n BOARD_CERTIFIED_FLAG BOOLEAN,\n EMPLOYMENT_STATUS VARCHAR(20),\n GENDER VARCHAR(20),\n LANGUAGE_CODE VARCHAR(10),\n YEARS_EXPERIENCE INT,\n HIRE_DATE DATE,\n TERMINATION_DATE DATE,\n CREATED_AT TIMESTAMP NOT NULL,\n UPDATED_AT TIMESTAMP,\n FOREIGN KEY (FACILITY_ID) REFERENCES FACILITIES(FACILITY_ID)\n);",
"instructions": "Generate realistic provider master data. PROVIDER_ID and NPI must be unique. SPECIALTY in {FAMILY_MEDICINE,INTERNAL_MEDICINE,CARDIOLOGY,ORTHOPEDICS,PEDIATRICS,ONCOLOGY,EMERGENCY_MEDICINE} with 18/20/12/12/12/8/18 split. PROVIDER_TYPE in {PHYSICIAN,NURSE_PRACTITIONER,PHYSICIAN_ASSISTANT,NURSE} with 55/15/10/20 split. EMPLOYMENT_STATUS in {ACTIVE,LEAVE,TERMINATED} with 92/4/4 split. YEARS_EXPERIENCE between 0 and 40.",
"columns_list": []
},
{
"table_id": "cfff7626-208d-485b-b8e3-0a4bb870f674",
"table_name": "FACILITIES",
"num_entries": 180000,
"ddl": "CREATE TABLE FACILITIES (\n FACILITY_ID VARCHAR(30) PRIMARY KEY,\n FACILITY_NAME VARCHAR(120) NOT NULL,\n FACILITY_TYPE VARCHAR(30) NOT NULL,\n TAXONOMY_CODE VARCHAR(20),\n PHONE_NUMBER VARCHAR(20),\n ADDRESS_LINE1 VARCHAR(120),\n ADDRESS_LINE2 VARCHAR(120),\n CITY VARCHAR(60),\n STATE_CODE CHAR(2),\n ZIP_CODE VARCHAR(10),\n COUNTRY_CODE CHAR(2),\n REGION VARCHAR(30),\n BED_COUNT INT,\n TRAUMA_LEVEL VARCHAR(20),\n TEACHING_FLAG BOOLEAN,\n OWNERSHIP_TYPE VARCHAR(30),\n CMS_CERTIFIED_FLAG BOOLEAN,\n NETWORK_STATUS VARCHAR(20),\n OPEN_DATE DATE,\n CLOSE_DATE DATE,\n LATITUDE DECIMAL(10,6),\n LONGITUDE DECIMAL(10,6),\n CREATED_AT TIMESTAMP NOT NULL,\n UPDATED_AT TIMESTAMP\n);",
"instructions": "Generate realistic healthcare facility data. FACILITY_TYPE in {HOSPITAL,CLINIC,URGENT_CARE,LAB,IMAGING_CENTER,SURGERY_CENTER} with 25/30/12/12/11/10 split. STATE_CODE should be valid US state abbreviations. PHONE_NUMBER should use +1-XXX-XXX-XXXX format. BED_COUNT should be positive for facility types that support beds and near zero for labs and imaging centers. NETWORK_STATUS in {IN_NETWORK,OUT_OF_NETWORK,MIXED} with 70/20/10 split.",
"columns_list": []
},
{
"table_id": "7f75bccf-5137-45f4-bbbb-7ec187208072",
"table_name": "APPOINTMENTS",
"num_entries": 300000,
"ddl": "CREATE TABLE APPOINTMENTS (\n APPOINTMENT_ID VARCHAR(30) PRIMARY KEY,\n PATIENT_ID BIGINT NOT NULL,\n PROVIDER_ID VARCHAR(30) NOT NULL,\n FACILITY_ID VARCHAR(30) NOT NULL,\n APPOINTMENT_DATE DATE NOT NULL,\n APPOINTMENT_TS TIMESTAMP NOT NULL,\n APPOINTMENT_TYPE VARCHAR(30) NOT NULL,\n APPOINTMENT_STATUS VARCHAR(20) NOT NULL,\n BOOKING_CHANNEL VARCHAR(20),\n REFERRAL_FLAG BOOLEAN,\n REFERRAL_SOURCE VARCHAR(50),\n CHECKIN_TS TIMESTAMP,\n CHECKOUT_TS TIMESTAMP,\n WAIT_TIME_MINUTES INT,\n DURATION_MINUTES INT,\n NO_SHOW_FLAG BOOLEAN,\n CANCELLATION_REASON VARCHAR(80),\n RESCHEDULED_FLAG BOOLEAN,\n COPAY_AMOUNT DECIMAL(12,2),\n INSURANCE_VERIFIED_FLAG BOOLEAN,\n VISIT_REASON VARCHAR(255),\n CREATED_AT TIMESTAMP NOT NULL,\n UPDATED_AT TIMESTAMP,\n FOREIGN KEY (PATIENT_ID) REFERENCES PATIENT_MASTER(PATIENT_ID),\n FOREIGN KEY (PROVIDER_ID) REFERENCES PROVIDERS(PROVIDER_ID),\n FOREIGN KEY (FACILITY_ID) REFERENCES FACILITIES(FACILITY_ID)\n);",
"instructions": "PATIENT_ID, PROVIDER_ID, and FACILITY_ID must always reference existing parent rows. APPOINTMENT_TYPE in {NEW_PATIENT,FOLLOW_UP,CONSULT,ANNUAL_VISIT,PROCEDURE,TELEHEALTH} with 18/32/12/18/10/10 split. APPOINTMENT_STATUS in {COMPLETED,SCHEDULED,CANCELED,NO_SHOW,RESCHEDULED} with 58/12/12/8/10 split. If APPOINTMENT_STATUS=NO_SHOW then NO_SHOW_FLAG=true. CHECKOUT_TS should be after CHECKIN_TS when both are present.",
"columns_list": []
},
{
"table_id": "bf7e9499-0e25-4eeb-ae08-1c70d0a4d466",
"table_name": "PATIENT_ENCOUNTERS",
"num_entries": 200000,
"ddl": "CREATE TABLE PATIENT_ENCOUNTERS (\n ENCOUNTER_ID BIGINT PRIMARY KEY,\n PATIENT_ID BIGINT NOT NULL,\n MRN VARCHAR(20) UNIQUE NOT NULL,\n FIRST_NAME VARCHAR(50) NOT NULL,\n MIDDLE_NAME VARCHAR(50),\n LAST_NAME VARCHAR(50) NOT NULL,\n DATE_OF_BIRTH DATE NOT NULL,\n GENDER VARCHAR(20) NOT NULL,\n PHONE_HOME VARCHAR(20),\n PHONE_MOBILE VARCHAR(20),\n EMAIL VARCHAR(120),\n ADDRESS_LINE1 VARCHAR(120),\n ADDRESS_LINE2 VARCHAR(120),\n CITY VARCHAR(60),\n STATE_CODE CHAR(2),\n ZIP_CODE VARCHAR(10),\n COUNTRY_CODE CHAR(2),\n MARITAL_STATUS VARCHAR(20),\n LANGUAGE_CODE VARCHAR(10),\n RACE VARCHAR(30),\n ETHNICITY VARCHAR(30),\n PATIENT_STATUS VARCHAR(20),\n PRIMARY_CARE_PROVIDER VARCHAR(100),\n GUARANTOR_NAME VARCHAR(100),\n EMERGENCY_CONTACT_NAME VARCHAR(100),\n EMERGENCY_CONTACT_PHONE VARCHAR(20),\n EMPLOYER_NAME VARCHAR(100),\n OCCUPATION VARCHAR(80),\n HEIGHT_CM DECIMAL(6,2),\n WEIGHT_KG DECIMAL(6,2),\n BMI DECIMAL(5,2),\n BLOOD_TYPE VARCHAR(5),\n ALLERGY_SUMMARY VARCHAR(255),\n SMOKING_STATUS VARCHAR(20),\n ALCOHOL_USE VARCHAR(20),\n PREGNANCY_STATUS VARCHAR(20),\n CHRONIC_CONDITION_FLAG BOOLEAN,\n DIABETES_FLAG BOOLEAN,\n HYPERTENSION_FLAG BOOLEAN,\n ASTHMA_FLAG BOOLEAN,\n HEART_DISEASE_FLAG BOOLEAN,\n ENCOUNTER_DATE TIMESTAMP NOT NULL,\n ADMISSION_DATE TIMESTAMP,\n DISCHARGE_DATE TIMESTAMP,\n ENCOUNTER_TYPE VARCHAR(30),\n VISIT_REASON VARCHAR(255),\n CHIEF_COMPLAINT VARCHAR(255),\n DEPARTMENT VARCHAR(50) NOT NULL,\n SUB_DEPARTMENT VARCHAR(50),\n FACILITY_NAME VARCHAR(100),\n FACILITY_ID VARCHAR(30),\n ROOM_NUMBER VARCHAR(20),\n BED_NUMBER VARCHAR(20),\n PROVIDER_ID VARCHAR(30),\n PROVIDER_NAME VARCHAR(100),\n ATTENDING_PHYSICIAN VARCHAR(100),\n REFERRING_PHYSICIAN VARCHAR(100),\n NURSE_NAME VARCHAR(100),\n TRIAGE_LEVEL VARCHAR(20),\n APPOINTMENT_ID VARCHAR(30),\n APPOINTMENT_STATUS VARCHAR(20),\n CHECKIN_TIME TIMESTAMP,\n CHECKOUT_TIME TIMESTAMP,\n WAIT_TIME_MINUTES INT,\n LENGTH_OF_STAY_HOURS DECIMAL(8,2),\n IS_FOLLOW_UP BOOLEAN NOT NULL,\n READMISSION_FLAG BOOLEAN,\n TELEHEALTH_FLAG BOOLEAN,\n NO_SHOW_FLAG BOOLEAN,\n DISCHARGE_DISPOSITION VARCHAR(50),\n DIAGNOSIS_CODE_1 VARCHAR(10),\n DIAGNOSIS_DESC_1 VARCHAR(255),\n DIAGNOSIS_CODE_2 VARCHAR(10),\n DIAGNOSIS_DESC_2 VARCHAR(255),\n DIAGNOSIS_CODE_3 VARCHAR(10),\n DIAGNOSIS_DESC_3 VARCHAR(255),\n PROCEDURE_CODE_1 VARCHAR(15),\n PROCEDURE_DESC_1 VARCHAR(255),\n PROCEDURE_CODE_2 VARCHAR(15),\n PROCEDURE_DESC_2 VARCHAR(255),\n PROCEDURE_CODE_3 VARCHAR(15),\n PROCEDURE_DESC_3 VARCHAR(255),\n MEDICATION_1 VARCHAR(100),\n MEDICATION_1_DOSE VARCHAR(50),\n MEDICATION_1_ROUTE VARCHAR(30),\n MEDICATION_2 VARCHAR(100),\n MEDICATION_2_DOSE VARCHAR(50),\n MEDICATION_2_ROUTE VARCHAR(30),\n MEDICATION_3 VARCHAR(100),\n MEDICATION_3_DOSE VARCHAR(50),\n MEDICATION_3_ROUTE VARCHAR(30),\n LAB_ORDER_ID VARCHAR(30),\n LAB_PANEL_NAME VARCHAR(100),\n LAB_RESULT_STATUS VARCHAR(20),\n HEMOGLOBIN_VALUE DECIMAL(6,2),\n WBC_COUNT DECIMAL(10,2),\n PLATELET_COUNT DECIMAL(10,2),\n GLUCOSE_MG_DL DECIMAL(8,2),\n CREATININE_MG_DL DECIMAL(8,2),\n SODIUM_MMOL_L DECIMAL(8,2),\n POTASSIUM_MMOL_L DECIMAL(8,2),\n CHLORIDE_MMOL_L DECIMAL(8,2),\n BUN_MG_DL DECIMAL(8,2),\n ALT_U_L DECIMAL(8,2),\n AST_U_L DECIMAL(8,2),\n TOTAL_CHOLESTEROL_MG_DL DECIMAL(8,2),\n HDL_MG_DL DECIMAL(8,2),\n LDL_MG_DL DECIMAL(8,2),\n TRIGLYCERIDES_MG_DL DECIMAL(8,2),\n A1C_PERCENT DECIMAL(5,2),\n COVID_TEST_RESULT VARCHAR(20),\n FLU_TEST_RESULT VARCHAR(20),\n PULSE INT,\n RESPIRATORY_RATE INT,\n TEMPERATURE_C DECIMAL(5,2),\n OXYGEN_SATURATION DECIMAL(5,2),\n SYSTOLIC_BP INT,\n DIASTOLIC_BP INT,\n PAIN_SCORE INT,\n FALL_RISK_SCORE INT,\n BILL_AMOUNT DECIMAL(12,2),\n ALLOWED_AMOUNT DECIMAL(12,2),\n PAID_AMOUNT DECIMAL(12,2),\n PATIENT_RESPONSIBILITY DECIMAL(12,2),\n COPAY_AMOUNT DECIMAL(12,2),\n COINSURANCE_AMOUNT DECIMAL(12,2),\n DEDUCTIBLE_AMOUNT DECIMAL(12,2),\n ADJUSTMENT_AMOUNT DECIMAL(12,2),\n WRITE_OFF_AMOUNT DECIMAL(12,2),\n OUTSTANDING_BALANCE DECIMAL(12,2),\n PAYMENT_PLAN_FLAG BOOLEAN,\n PAYMENT_STATUS VARCHAR(20),\n CLAIM_ID VARCHAR(30),\n CLAIM_STATUS VARCHAR(20),\n CLAIM_SUBMITTED_DATE DATE,\n CLAIM_PAID_DATE DATE,\n CLAIM_DENIAL_REASON VARCHAR(100),\n REVENUE_CODE VARCHAR(10),\n DRG_CODE VARCHAR(10),\n CPT_HCPCS_CODE VARCHAR(15),\n ICD_VERSION VARCHAR(10),\n INSURANCE_PLAN VARCHAR(50),\n PAYER_NAME VARCHAR(100),\n PAYER_TYPE VARCHAR(30),\n POLICY_NUMBER VARCHAR(40),\n GROUP_NUMBER VARCHAR(40),\n SUBSCRIBER_ID VARCHAR(40),\n SUBSCRIBER_RELATIONSHIP VARCHAR(30),\n PRIOR_AUTH_NUMBER VARCHAR(40),\n COVERAGE_EFFECTIVE_DATE DATE,\n COVERAGE_END_DATE DATE,\n NETWORK_STATUS VARCHAR(20),\n REFERRAL_REQUIRED_FLAG BOOLEAN,\n CASE_MANAGER_NAME VARCHAR(100),\n CARE_GAP_FLAG BOOLEAN,\n RISK_SCORE DECIMAL(6,2),\n SEVERITY_SCORE DECIMAL(6,2),\n QUALITY_SCORE DECIMAL(6,2),\n SATISFACTION_SCORE DECIMAL(6,2),\n TRANSPORT_MODE VARCHAR(30),\n ARRIVAL_SOURCE VARCHAR(30),\n DISCHARGE_INSTRUCTIONS VARCHAR(255),\n FOLLOW_UP_DUE_DATE DATE,\n NEXT_APPOINTMENT_DATE DATE,\n PORTAL_ENROLLED_FLAG BOOLEAN,\n SMS_CONSENT_FLAG BOOLEAN,\n EMAIL_CONSENT_FLAG BOOLEAN,\n DATA_SOURCE_SYSTEM VARCHAR(50),\n IMPORT_BATCH_ID VARCHAR(40),\n RECORD_VERSION INT,\n IS_ACTIVE BOOLEAN,\n IS_DELETED BOOLEAN,\n CREATED_AT TIMESTAMP NOT NULL,\n UPDATED_AT TIMESTAMP,\n CREATED_BY VARCHAR(50),\n UPDATED_BY VARCHAR(50),\n CUSTOM_ATTR_001 VARCHAR(100),\n CUSTOM_ATTR_002 VARCHAR(100),\n CUSTOM_ATTR_003 VARCHAR(100),\n CUSTOM_ATTR_004 VARCHAR(100),\n CUSTOM_ATTR_005 VARCHAR(100),\n CUSTOM_ATTR_006 VARCHAR(100),\n CUSTOM_ATTR_007 VARCHAR(100),\n CUSTOM_ATTR_008 VARCHAR(100),\n CUSTOM_ATTR_009 VARCHAR(100),\n CUSTOM_ATTR_010 VARCHAR(100),\n CUSTOM_NUM_001 DECIMAL(12,2),\n CUSTOM_NUM_002 DECIMAL(12,2),\n CUSTOM_NUM_003 DECIMAL(12,2),\n CUSTOM_NUM_004 DECIMAL(12,2),\n CUSTOM_NUM_005 DECIMAL(12,2),\n CUSTOM_DATE_001 DATE,\n CUSTOM_DATE_002 DATE,\n CUSTOM_DATE_003 DATE,\n CUSTOM_FLAG_001 BOOLEAN,\n CUSTOM_FLAG_002 BOOLEAN,\n CUSTOM_FLAG_003 BOOLEAN,\n CUSTOM_FLAG_004 BOOLEAN,\n NOTES VARCHAR(500),\n RECORD_HASH VARCHAR(128),\n FOREIGN KEY (PATIENT_ID) REFERENCES PATIENT_MASTER(PATIENT_ID),\n FOREIGN KEY (FACILITY_ID) REFERENCES FACILITIES(FACILITY_ID),\n FOREIGN KEY (PROVIDER_ID) REFERENCES PROVIDERS(PROVIDER_ID)\n);",
"instructions": "Generate realistic large-scale US healthcare encounter records for this wide 200-column encounter fact table. Honor all DDL datatypes and keep referential integrity to PATIENT_MASTER, FACILITIES, and PROVIDERS.\nGENDER in {MALE,FEMALE,OTHER} with 48/50/2 split.\nDEPARTMENT in {EMERGENCY,GENERAL_MEDICINE,CARDIOLOGY,ORTHOPEDICS,ONCOLOGY,PEDIATRICS} with 20/25/15/15/10/15 split.\nENCOUNTER_TYPE in {INPATIENT,OUTPATIENT,ED,OBSERVATION,TELEHEALTH} with 18/42/20/8/12 split.\nCLAIM_STATUS in {PAID,PENDING,DENIED,ADJUDICATING} with 65/15/8/12 split.\nPAYMENT_STATUS in {PAID,PARTIAL,PENDING,OVERDUE} with 60/10/20/10 split.\nPHONE_MOBILE and PHONE_HOME should use +1-XXX-XXX-XXXX format.\nENCOUNTER_DATE must be after DATE_OF_BIRTH and within the last 3 years.\nIf DISCHARGE_DATE is populated it must be on or after ADMISSION_DATE.\nBILL_AMOUNT, ALLOWED_AMOUNT, PAID_AMOUNT, and OUTSTANDING_BALANCE must be numerically consistent and non-negative.",
"columns_list": []
},
{
"table_id": "fa50bdbd-15e5-41a9-b693-8492bc94d039",
"table_name": "ENCOUNTER_DIAGNOSES",
"num_entries": 850000,
"ddl": "CREATE TABLE ENCOUNTER_DIAGNOSES (\n ENCOUNTER_ID BIGINT NOT NULL,\n DIAGNOSIS_SEQUENCE INT NOT NULL,\n PATIENT_ID BIGINT NOT NULL,\n DIAGNOSIS_CODE VARCHAR(10) NOT NULL,\n DIAGNOSIS_DESCRIPTION VARCHAR(255) NOT NULL,\n DIAGNOSIS_TYPE VARCHAR(20) NOT NULL,\n DIAGNOSIS_GROUP VARCHAR(50),\n PRESENT_ON_ADMISSION_FLAG BOOLEAN NOT NULL,\n CHRONIC_FLAG BOOLEAN NOT NULL,\n SEVERITY_LEVEL VARCHAR(20),\n RISK_ADJUSTABLE_FLAG BOOLEAN,\n PRIMARY_DIAGNOSIS_FLAG BOOLEAN,\n BILLABLE_FLAG BOOLEAN,\n ONSET_DATE DATE,\n RESOLUTION_DATE DATE,\n DIAGNOSIS_DATE DATE NOT NULL,\n CODING_SYSTEM VARCHAR(10) NOT NULL,\n CODER_ID VARCHAR(30),\n AUDIT_STATUS VARCHAR(20),\n CLAIM_LINKED_FLAG BOOLEAN,\n SOURCE_SYSTEM VARCHAR(30),\n CREATED_AT TIMESTAMP NOT NULL,\n UPDATED_AT TIMESTAMP,\n PRIMARY KEY (ENCOUNTER_ID, DIAGNOSIS_SEQUENCE),\n FOREIGN KEY (ENCOUNTER_ID) REFERENCES PATIENT_ENCOUNTERS(ENCOUNTER_ID),\n FOREIGN KEY (PATIENT_ID) REFERENCES PATIENT_MASTER(PATIENT_ID)\n);",
"instructions": "ENCOUNTER_ID must reference PATIENT_ENCOUNTERS and PATIENT_ID should match the patient on that encounter. DIAGNOSIS_TYPE in {PRIMARY,SECONDARY,ADMITTING,FINAL} with 30/35/15/20 split. CODING_SYSTEM in {ICD10,ICD9} with 95/5 split. PRESENT_ON_ADMISSION_FLAG should usually be true for admitting diagnoses. Composite key (ENCOUNTER_ID, DIAGNOSIS_SEQUENCE) must be unique.",
"columns_list": []
},
{
"table_id": "f5bcc7e7-7d40-48ca-86f4-d0d6ce8940e6",
"table_name": "ENCOUNTER_PROCEDURES",
"num_entries": 500000,
"ddl": "CREATE TABLE ENCOUNTER_PROCEDURES (\n ENCOUNTER_ID BIGINT NOT NULL,\n PROCEDURE_SEQUENCE INT NOT NULL,\n PATIENT_ID BIGINT NOT NULL,\n PROCEDURE_CODE VARCHAR(15) NOT NULL,\n PROCEDURE_DESCRIPTION VARCHAR(255) NOT NULL,\n PROCEDURE_TYPE VARCHAR(30) NOT NULL,\n PERFORMING_PROVIDER_ID VARCHAR(30),\n ASSISTING_PROVIDER_ID VARCHAR(30),\n PROCEDURE_DATE DATE NOT NULL,\n PROCEDURE_START_TS TIMESTAMP,\n PROCEDURE_END_TS TIMESTAMP,\n ANESTHESIA_TYPE VARCHAR(30),\n MODIFIER_1 VARCHAR(10),\n MODIFIER_2 VARCHAR(10),\n LATERALITY VARCHAR(10),\n BODY_SITE VARCHAR(50),\n STATUS VARCHAR(20),\n COMPLICATION_FLAG BOOLEAN,\n DEVICE_IMPLANTED_FLAG BOOLEAN,\n SUPPLY_COST DECIMAL(12,2),\n PROCEDURE_COST DECIMAL(12,2),\n CREATED_AT TIMESTAMP NOT NULL,\n UPDATED_AT TIMESTAMP,\n PRIMARY KEY (ENCOUNTER_ID, PROCEDURE_SEQUENCE),\n FOREIGN KEY (ENCOUNTER_ID) REFERENCES PATIENT_ENCOUNTERS(ENCOUNTER_ID),\n FOREIGN KEY (PATIENT_ID) REFERENCES PATIENT_MASTER(PATIENT_ID),\n FOREIGN KEY (PERFORMING_PROVIDER_ID) REFERENCES PROVIDERS(PROVIDER_ID),\n FOREIGN KEY (ASSISTING_PROVIDER_ID) REFERENCES PROVIDERS(PROVIDER_ID)\n);",
"instructions": "ENCOUNTER_ID and PATIENT_ID must align to the same encounter. PROCEDURE_TYPE in {SURGICAL,DIAGNOSTIC,THERAPEUTIC,IMAGING,LAB} with 18/22/20/20/20 split. STATUS in {COMPLETED,CANCELED,SCHEDULED,ABORTED} with 82/6/8/4 split. If PROCEDURE_END_TS is populated it must be after PROCEDURE_START_TS. SUPPLY_COST and PROCEDURE_COST must be non-negative.",
"columns_list": []
},
{
"table_id": "e61eae13-c587-47e2-9788-17b123e18171",
"table_name": "MEDICATION_ORDERS",
"num_entries": 650000,
"ddl": "CREATE TABLE MEDICATION_ORDERS (\n MED_ORDER_ID VARCHAR(30) PRIMARY KEY,\n ENCOUNTER_ID BIGINT NOT NULL,\n PATIENT_ID BIGINT NOT NULL,\n PROVIDER_ID VARCHAR(30) NOT NULL,\n MEDICATION_NAME VARCHAR(100) NOT NULL,\n GENERIC_NAME VARCHAR(100),\n DRUG_CODE VARCHAR(20),\n THERAPEUTIC_CLASS VARCHAR(50),\n ORDER_STATUS VARCHAR(20) NOT NULL,\n ROUTE VARCHAR(30),\n DOSE_VALUE DECIMAL(10,2),\n DOSE_UNIT VARCHAR(20),\n FREQUENCY VARCHAR(30),\n PRN_FLAG BOOLEAN,\n INDICATION VARCHAR(255),\n ORDER_TS TIMESTAMP NOT NULL,\n START_TS TIMESTAMP,\n END_TS TIMESTAMP,\n DISCONTINUE_REASON VARCHAR(80),\n PHARMACY_STATUS VARCHAR(20),\n ADMINISTRATION_COUNT INT,\n REFILL_COUNT INT,\n FORMULARY_FLAG BOOLEAN,\n HIGH_ALERT_FLAG BOOLEAN,\n CREATED_AT TIMESTAMP NOT NULL,\n UPDATED_AT TIMESTAMP,\n FOREIGN KEY (ENCOUNTER_ID) REFERENCES PATIENT_ENCOUNTERS(ENCOUNTER_ID),\n FOREIGN KEY (PATIENT_ID) REFERENCES PATIENT_MASTER(PATIENT_ID),\n FOREIGN KEY (PROVIDER_ID) REFERENCES PROVIDERS(PROVIDER_ID)\n);",
"instructions": "MED_ORDER_ID must be unique. ENCOUNTER_ID, PATIENT_ID, and PROVIDER_ID must reference existing parent rows. ORDER_STATUS in {ACTIVE,COMPLETED,DISCONTINUED,CANCELED,ON_HOLD} with 35/35/12/8/10 split. ROUTE in {PO,IV,IM,SUBQ,TOPICAL,INHALATION} with 35/25/10/10/8/12 split. If END_TS is populated it must be on or after START_TS.",
"columns_list": []
},
{
"table_id": "aa2922c5-4505-459a-b249-a1aa1abe47b8",
"table_name": "LAB_RESULTS",
"num_entries": 900000,
"ddl": "CREATE TABLE LAB_RESULTS (\n RESULT_ID VARCHAR(30) PRIMARY KEY,\n ENCOUNTER_ID BIGINT NOT NULL,\n PATIENT_ID BIGINT NOT NULL,\n ORDERING_PROVIDER_ID VARCHAR(30),\n LAB_PANEL_NAME VARCHAR(100),\n TEST_CODE VARCHAR(20) NOT NULL,\n TEST_NAME VARCHAR(120) NOT NULL,\n SPECIMEN_TYPE VARCHAR(30),\n COLLECTION_TS TIMESTAMP,\n RECEIVED_TS TIMESTAMP,\n RESULT_TS TIMESTAMP,\n RESULT_STATUS VARCHAR(20) NOT NULL,\n RESULT_VALUE_NUM DECIMAL(14,4),\n RESULT_VALUE_TEXT VARCHAR(255),\n UNIT_OF_MEASURE VARCHAR(20),\n REFERENCE_RANGE_LOW DECIMAL(14,4),\n REFERENCE_RANGE_HIGH DECIMAL(14,4),\n ABNORMAL_FLAG VARCHAR(10),\n CRITICAL_FLAG BOOLEAN,\n PERFORMING_LAB_FACILITY_ID VARCHAR(30),\n SOURCE_SYSTEM VARCHAR(30),\n VERIFIED_BY VARCHAR(30),\n CREATED_AT TIMESTAMP NOT NULL,\n UPDATED_AT TIMESTAMP,\n FOREIGN KEY (ENCOUNTER_ID) REFERENCES PATIENT_ENCOUNTERS(ENCOUNTER_ID),\n FOREIGN KEY (PATIENT_ID) REFERENCES PATIENT_MASTER(PATIENT_ID),\n FOREIGN KEY (ORDERING_PROVIDER_ID) REFERENCES PROVIDERS(PROVIDER_ID),\n FOREIGN KEY (PERFORMING_LAB_FACILITY_ID) REFERENCES FACILITIES(FACILITY_ID)\n);",
"instructions": "RESULT_ID must be unique. RESULT_STATUS in {FINAL,PRELIMINARY,CORRECTED,CANCELED} with 78/12/4/6 split. ABNORMAL_FLAG in {NORMAL,HIGH,LOW,CRITICAL} with 72/12/10/6 split. If RESULT_VALUE_NUM is populated it should be consistent with reference ranges. RESULT_TS should be on or after COLLECTION_TS.",
"columns_list": []
},
{
"table_id": "8bbab751-c1fd-4948-b15f-eecafe542494",
"table_name": "CLAIMS",
"num_entries": 400000,
"ddl": "CREATE TABLE CLAIMS (\n CLAIM_ID VARCHAR(30) PRIMARY KEY,\n ENCOUNTER_ID BIGINT NOT NULL,\n PATIENT_ID BIGINT NOT NULL,\n FACILITY_ID VARCHAR(30) NOT NULL,\n BILLING_PROVIDER_ID VARCHAR(30),\n PAYER_NAME VARCHAR(100) NOT NULL,\n PAYER_TYPE VARCHAR(30),\n PLAN_NAME VARCHAR(100),\n MEMBER_ID VARCHAR(40),\n GROUP_NUMBER VARCHAR(40),\n CLAIM_TYPE VARCHAR(30) NOT NULL,\n CLAIM_STATUS VARCHAR(20) NOT NULL,\n SUBMISSION_DATE DATE,\n RECEIPT_DATE DATE,\n ADJUDICATION_DATE DATE,\n DENIAL_REASON VARCHAR(100),\n TOTAL_CHARGED DECIMAL(14,2),\n TOTAL_ALLOWED DECIMAL(14,2),\n TOTAL_PAID DECIMAL(14,2),\n PATIENT_RESPONSIBILITY DECIMAL(14,2),\n DRG_CODE VARCHAR(10),\n REVENUE_CODE VARCHAR(10),\n PLACE_OF_SERVICE VARCHAR(10),\n COB_FLAG BOOLEAN,\n PRIOR_AUTH_NUMBER VARCHAR(40),\n CREATED_AT TIMESTAMP NOT NULL,\n UPDATED_AT TIMESTAMP,\n FOREIGN KEY (ENCOUNTER_ID) REFERENCES PATIENT_ENCOUNTERS(ENCOUNTER_ID),\n FOREIGN KEY (PATIENT_ID) REFERENCES PATIENT_MASTER(PATIENT_ID),\n FOREIGN KEY (FACILITY_ID) REFERENCES FACILITIES(FACILITY_ID),\n FOREIGN KEY (BILLING_PROVIDER_ID) REFERENCES PROVIDERS(PROVIDER_ID)\n);",
"instructions": "CLAIM_ID must be unique. ENCOUNTER_ID, PATIENT_ID, FACILITY_ID, and BILLING_PROVIDER_ID must reference existing parent rows. CLAIM_TYPE in {PROFESSIONAL,INSTITUTIONAL,PHARMACY} with 45/45/10 split. CLAIM_STATUS in {PAID,PENDING,DENIED,ADJUDICATING,VOIDED} with 62/14/10/10/4 split. TOTAL_ALLOWED should not exceed TOTAL_CHARGED. TOTAL_PAID and PATIENT_RESPONSIBILITY should be non-negative.",
"columns_list": []
}
],
"schema_gen_status": "ERROR",
"schema_gen_last_update": "2026-04-16 17:12:20",
"schema_gen_log": "Generated data script failed. File: C:\\Users\\aakas\\Desktop\\DLS\\Datagenx\\datagenx\\schemas\\Healthcare_10_Table_Scale\\code_generated\\generated_code.py. Details: Command '['C:\\\\Users\\\\aakas\\\\Desktop\\\\DLS\\\\Datagenx\\\\datagenx\\\\.venv\\\\Scripts\\\\python.exe', 'C:\\\\Users\\\\aakas\\\\Desktop\\\\DLS\\\\Datagenx\\\\datagenx\\\\schemas\\\\Healthcare_10_Table_Scale\\\\code_generated\\\\generated_code.py']' returned non-zero exit status 3221225786. | Log: C:\\Users\\aakas\\Desktop\\DLS\\Datagenx\\datagenx\\schemas\\Healthcare_10_Table_Scale\\logs\\latest.log",
"dg_code_gen_status": "DONE",
"dg_code_gen_at": "2026-04-15 18:47:57",
"dg_code_gen_log": "Code generated successfully.",
"dg_bulkdata_gen_status": "ERROR",
"dg_bulkdata_gen_at": "2026-04-15 18:47:57",
"dg_bulkdata_gen_log": "Running generated script.",
"dg_sf_upload_status": "NEW",
"dg_sf_upload_at": "2026-04-15 17:31:20",
"dg_sf_upload_log": "Generated data script failed. File: C:\\Users\\aakas\\Desktop\\DLS\\Datagenx\\datagenx\\schemas\\Healthcare_10_Table_Scale\\code_generated\\generated_code.py. Details: Command '['C:\\\\Users\\\\aakas\\\\Desktop\\\\DLS\\\\Datagenx\\\\datagenx\\\\.venv\\\\Scripts\\\\python.exe', 'C:\\\\Users\\\\aakas\\\\Desktop\\\\DLS\\\\Datagenx\\\\datagenx\\\\schemas\\\\Healthcare_10_Table_Scale\\\\code_generated\\\\generated_code.py']' returned non-zero exit status 3221225786. | Log: C:\\Users\\aakas\\Desktop\\DLS\\Datagenx\\datagenx\\schemas\\Healthcare_10_Table_Scale\\logs\\latest.log",
"last_run_log_path": "C:\\Users\\aakas\\Desktop\\DLS\\Datagenx\\datagenx\\schemas\\Healthcare_10_Table_Scale\\logs\\latest.log",
"last_error_trace": "Generated data script failed. File: C:\\Users\\aakas\\Desktop\\DLS\\Datagenx\\datagenx\\schemas\\Healthcare_10_Table_Scale\\code_generated\\generated_code.py. Details: Command '['C:\\\\Users\\\\aakas\\\\Desktop\\\\DLS\\\\Datagenx\\\\datagenx\\\\.venv\\\\Scripts\\\\python.exe', 'C:\\\\Users\\\\aakas\\\\Desktop\\\\DLS\\\\Datagenx\\\\datagenx\\\\schemas\\\\Healthcare_10_Table_Scale\\\\code_generated\\\\generated_code.py']' returned non-zero exit status 3221225786.\nTraceback (most recent call last):\n File \"C:\\Users\\aakas\\Desktop\\DLS\\Datagenx\\datagenx\\data_generation_backend.py\", line 1636, in generate_schema_data\n run = subprocess.run(\n ^^^^^^^^^^^^^^^\n File \"C:\\Users\\aakas\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\subprocess.py\", line 571, in run\n raise CalledProcessError(retcode, process.args,\nsubprocess.CalledProcessError: Command '['C:\\\\Users\\\\aakas\\\\Desktop\\\\DLS\\\\Datagenx\\\\datagenx\\\\.venv\\\\Scripts\\\\python.exe', 'C:\\\\Users\\\\aakas\\\\Desktop\\\\DLS\\\\Datagenx\\\\datagenx\\\\schemas\\\\Healthcare_10_Table_Scale\\\\code_generated\\\\generated_code.py']' returned non-zero exit status 3221225786.\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n File \"C:\\Users\\aakas\\Desktop\\DLS\\Datagenx\\datagenx\\data_generation_backend.py\", line 1702, in generate_schema_data\n raise RuntimeError(\nRuntimeError: Generated data script failed. File: C:\\Users\\aakas\\Desktop\\DLS\\Datagenx\\datagenx\\schemas\\Healthcare_10_Table_Scale\\code_generated\\generated_code.py. Details: Command '['C:\\\\Users\\\\aakas\\\\Desktop\\\\DLS\\\\Datagenx\\\\datagenx\\\\.venv\\\\Scripts\\\\python.exe', 'C:\\\\Users\\\\aakas\\\\Desktop\\\\DLS\\\\Datagenx\\\\datagenx\\\\schemas\\\\Healthcare_10_Table_Scale\\\\code_generated\\\\generated_code.py']' returned non-zero exit status 3221225786.\n"
}
],
"_copyright_notice": [
"Copyright (c) 2026 DataLake Solutions. All rights reserved.",
"This source code and all related materials are proprietary to DataLake Solutions.",
"You may not, without prior written permission from DataLake Solutions, copy, modify, distribute, sublicense, sell, publish, or otherwise disclose this code.",
"You may not share this code with third parties or post it to public repositories, forums, or websites.",
"You may not use this code to create derivative works for external distribution or commercial exploitation.",
"Unauthorized use, disclosure, or distribution is strictly prohibited."
]
}