diff --git a/Dockerfile b/Dockerfile index 2eed37db..e90b8250 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,33 +1,27 @@ FROM python:3.8-slim -RUN set -ex \ - && apt-get -y update && apt-get -y upgrade \ - && apt install python3-pip -y \ - && apt install curl -y \ +# Install dependencies +RUN apt-get update && apt-get install -y libsasl2-dev curl gcc libldap2-dev \ && DOCKER_CONFIG=${DOCKER_CONFIG:-$HOME/.docker} \ && mkdir -p $DOCKER_CONFIG/cli-plugins \ && curl -SL https://github.com/docker/compose/releases/download/v2.2.3/docker-compose-linux-x86_64 -o $DOCKER_CONFIG/cli-plugins/docker-compose \ && chmod +x $DOCKER_CONFIG/cli-plugins/docker-compose - -ADD . /datahub +# Set the working directory and copy the application files WORKDIR /datahub +COPY . /datahub -# RUN poetry init -RUN python3 -m pip install --upgrade pip -RUN pip install -r requirements.txt - -# RUN python manage.py makemigrations \ -# && python manage.py migrate \ -# && python manage.py loaddata db_scripts/userrole_fixture.yaml \ -# && python manage.py loaddata db_scripts/initial_data.yaml +# Upgrade pip and install required Python packages +RUN python -m pip install --upgrade pip \ + && pip install python-ldap==3.3.1 \ + && pip install --upgrade pyopenssl \ + && pip install -r requirements.txt -ENV PYTHONUNBUFFERED 1 -# ENV VIRTUAL_ENV /env - -# ENV PATH /env/bin:$PATH +# Set environment variables +ENV PYTHONUNBUFFERED 1 +# Expose port 8000 for the Django app EXPOSE 8000 - +# Command to run the Django development server CMD ["python", "manage.py", "runserver", "0.0.0.0:8000"] diff --git a/datahub/migrations/0041_resource_resourcefile.py b/datahub/migrations/0041_resource_resourcefile.py new file mode 100644 index 00000000..1beaecf1 --- /dev/null +++ b/datahub/migrations/0041_resource_resourcefile.py @@ -0,0 +1,95 @@ +# Generated by Django 4.1.5 on 2023-12-07 08:17 + +from django.db import migrations, models +import django.db.models.deletion +import uuid + + +class Migration(migrations.Migration): + + dependencies = [ + ("datahub", "0040_alter_datasetv2_name_alter_policy_description_and_more"), + ] + + operations = [ + migrations.CreateModel( + name="Resource", + fields=[ + ("created_at", models.DateTimeField(auto_now_add=True)), + ("updated_at", models.DateTimeField(auto_now=True)), + ( + "id", + models.UUIDField( + default=uuid.uuid4, + editable=False, + primary_key=True, + serialize=False, + ), + ), + ("title", models.CharField(max_length=100)), + ("description", models.TextField(max_length=250)), + ("category", models.JSONField(default=dict)), + ( + "user_map", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + to="datahub.userorganizationmap", + ), + ), + ], + options={ + "abstract": False, + }, + ), + migrations.CreateModel( + name="ResourceFile", + fields=[ + ("created_at", models.DateTimeField(auto_now_add=True)), + ("updated_at", models.DateTimeField(auto_now=True)), + ( + "id", + models.UUIDField( + default=uuid.uuid4, + editable=False, + primary_key=True, + serialize=False, + ), + ), + ( + "file", + models.FileField( + blank=True, null=True, upload_to="users/resources/" + ), + ), + ("file_size", models.PositiveIntegerField(blank=True, null=True)), + ( + "type", + models.CharField( + choices=[ + ("youtube", "youtube"), + ("pdf", "pdf"), + ("file", "file"), + ], + max_length=20, + null=True, + ), + ), + ("url", models.CharField(max_length=200, null=True)), + ( + "transcription", + models.CharField(blank=True, max_length=2500, null=True), + ), + ( + "resource", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="resources", + to="datahub.resource", + ), + ), + ], + options={ + "abstract": False, + }, + ), + ] diff --git a/datahub/migrations/0042_merge_20231207_0944.py b/datahub/migrations/0042_merge_20231207_0944.py new file mode 100644 index 00000000..263ad5e6 --- /dev/null +++ b/datahub/migrations/0042_merge_20231207_0944.py @@ -0,0 +1,16 @@ +# Generated by Django 4.1.5 on 2023-12-07 09:44 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ( + "datahub", + "0041_langchainpgcollection_langchainpgembedding_resource_and_more", + ), + ("datahub", "0041_resource_resourcefile"), + ] + + operations = [] diff --git a/datahub/migrations/0043_delete_langchainpgcollection_and_more.py b/datahub/migrations/0043_delete_langchainpgcollection_and_more.py new file mode 100644 index 00000000..6bbf8bf3 --- /dev/null +++ b/datahub/migrations/0043_delete_langchainpgcollection_and_more.py @@ -0,0 +1,28 @@ +# Generated by Django 4.1.5 on 2023-12-07 09:44 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("datahub", "0042_merge_20231207_0944"), + ] + + operations = [ + migrations.DeleteModel( + name="LangchainPgCollection", + ), + migrations.DeleteModel( + name="LangchainPgEmbedding", + ), + migrations.AlterField( + model_name="resourcefile", + name="type", + field=models.CharField( + choices=[("youtube", "youtube"), ("pdf", "pdf"), ("file", "file")], + max_length=20, + null=True, + ), + ), + ] diff --git a/datahub/migrations/0044_alter_resource_description_alter_resource_title.py b/datahub/migrations/0044_alter_resource_description_alter_resource_title.py new file mode 100644 index 00000000..de4b1eb2 --- /dev/null +++ b/datahub/migrations/0044_alter_resource_description_alter_resource_title.py @@ -0,0 +1,23 @@ +# Generated by Django 4.1.5 on 2023-12-19 18:56 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("datahub", "0043_delete_langchainpgcollection_and_more"), + ] + + operations = [ + migrations.AlterField( + model_name="resource", + name="description", + field=models.TextField(max_length=500), + ), + migrations.AlterField( + model_name="resource", + name="title", + field=models.CharField(max_length=200), + ), + ] diff --git a/datahub/models.py b/datahub/models.py index c0c87430..b0e0db1e 100644 --- a/datahub/models.py +++ b/datahub/models.py @@ -296,7 +296,9 @@ def __str__(self) -> str: RESOURCE_URL_TYPE = ( ("youtube", "youtube"), - ("pdf", "pdf") + ("pdf", "pdf"), + ("file", "file") + ) class ResourceFile(TimeStampMixin): @@ -315,34 +317,31 @@ class ResourceFile(TimeStampMixin): def __str__(self) -> str: return self.file.name -from pgvector.django import VectorField +# from pgvector.django import VectorField # class ResourceVector(TimeStampMixin): # resource_file = models.ForeignKey(ResourceFile, on_delete=models.CASCADE, related_name="resource_file") -class LangchainPgCollection(models.Model): - name = models.UUIDField() - cmetadata = models.JSONField() - uuid = models.UUIDField(primary_key=True) - - class Meta: - db_table = 'langchain_pg_collection' - +# class LangchainPgCollection(models.Model): +# name = models.UUIDField() +# cmetadata = models.JSONField() +# uuid = models.UUIDField(primary_key=True) -class LangchainPgEmbedding(models.Model): - # resource_file = models.ForeignKey(ResourceFile, on_delete=models.CASCADE) - collection_id = models.UUIDField() - embedding = VectorField(1563) # Assuming 'vector' is a custom PostgreSQL data type - document = models.TextField() - cmetadata = models.JSONField() - custom_id = models.CharField(max_length=255) - uuid = models.UUIDField(primary_key=True) +# class Meta: +# db_table = 'langchain_pg_collection' - class Meta: - db_table = 'langchain_pg_embedding' - - def __str__(self): - return f"LangchainPgEmbedding(uuid={self.uuid}, document={self.document})" +# class LangchainPgEmbedding(models.Model): +# # resource_file = models.ForeignKey(ResourceFile, on_delete=models.CASCADE) +# collection_id = models.UUIDField() +# embedding = VectorField(1563) # Assuming 'vector' is a custom PostgreSQL data type +# document = models.TextField() +# cmetadata = models.JSONField() +# custom_id = models.CharField(max_length=255) +# uuid = models.UUIDField(primary_key=True) +# class Meta: +# db_table = 'langchain_pg_embedding' +# def __str__(self): +# return f"LangchainPgEmbedding(uuid={self.uuid}, document={self.document})" diff --git a/datahub/serializers.py b/datahub/serializers.py index ab4e4898..9a62f561 100644 --- a/datahub/serializers.py +++ b/datahub/serializers.py @@ -8,6 +8,7 @@ from django.conf import settings from django.core.exceptions import ObjectDoesNotExist, ValidationError from django.core.validators import URLValidator +from django.db.models import Count, Q from django.utils.translation import gettext as _ from rest_framework import serializers, status from django.db.models import Count diff --git a/requirements.txt b/requirements.txt index 8959bc00..bec98c24 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,11 @@ +aiohttp==3.9.1 +aiosignal==1.3.1 +anyio==3.7.1 asgiref==3.6.0 astroid==2.13.2 +async-timeout==4.0.3 asyncio==3.4.3 attrs==21.4.0 -backports.zoneinfo==0.2.1 CacheControl==0.12.11 cachy==0.3.0 certifi==2022.5.18.1 @@ -11,13 +14,16 @@ charset-normalizer==2.0.12 cleo==0.8.1 click==8.1.3 clikit==0.6.2 +configobj==5.0.8 coreapi==2.3.3 coreschema==0.0.4 coverage==7.0.3 crashtest==0.3.1 cryptography==37.0.2 cssutils==2.6.0 +dataclasses-json==0.6.3 dateutils==0.6.12 +deprecation==2.1.0 dill==0.3.6 distlib==0.3.4 Django==4.1.5 @@ -31,6 +37,7 @@ django-rest-framework-braces==0.3.4 django_debug_toolbar==3.8.1 djangorestframework==3.13.1 djangorestframework-simplejwt==5.2.2 +docker==6.1.3 drf-generators==0.5.0 drf-spectacular==0.22.1 drf-spectacular-sidecar==2022.7.1 @@ -41,6 +48,8 @@ factory-boy==3.2.1 Faker==16.6.0 filelock==3.9.0 filetype==1.1.0 +frozenlist==1.4.0 +future==0.18.3 html5lib==1.1 idna==3.3 importlib-metadata==6.0.0 @@ -52,25 +61,34 @@ itypes==1.2.0 jaraco.classes==3.2.3 jeepney==0.8.0 Jinja2==3.1.2 +jsonpatch==1.33 +jsonpointer==2.4 jsonschema==4.17.3 keyring==23.13.1 +langchain==0.0.346 +langchain-core==0.0.10 +langsmith==0.0.69 lazy-object-proxy==1.7.1 lockfile==0.12.2 MarkupSafe==2.1.1 +marshmallow==3.20.1 mccabe==0.7.0 model-bakery==1.9.0 more-itertools==9.0.0 msgpack==1.0.4 +multidict==6.0.4 mypy-extensions==0.4.3 mysql-connector-python==8.0.28 nose==1.3.7 numpy==1.24.1 +openai==0.28.1 openpyxl==3.0.10 packaging==20.9 pandas==1.5.2 pastel==0.2.1 pathspec==0.9.0 pexpect==4.8.0 +pgvector==0.2.4 phonenumbers==8.13.15 Pillow==9.4.0 pkginfo==1.8.2 @@ -81,10 +99,13 @@ pluggy==1.0.0 poetry==1.1.13 poetry-core==1.0.8 protobuf==3.20.1 +psycopg==3.1.14 psycopg2-binary==2.9.3 ptyprocess==0.7.0 py==1.11.0 pyaml==21.10.1 +pyasn1==0.5.1 +pyasn1-modules==0.3.0 pycparser==2.21 pydantic==1.9.2 PyJWT==2.4.0 @@ -97,7 +118,10 @@ pytest==7.2.0 pytest-django==4.5.2 pytest-factoryboy==2.5.1 python-dateutil==2.8.2 +python-dotenv==1.0.0 +python-gnupg==0.5.1 python-http-client==3.3.7 +python-ldap==3.4.4 python-magic==0.4.27 python-on-whales==0.55.0 pytz==2022.1 @@ -106,33 +130,33 @@ requests==2.27.1 requests-toolbelt==0.9.1 ruamel.yaml==0.17.21 ruamel.yaml.clib==0.2.6 +secrets==1.0.2 SecretStorage==3.3.2 sendgrid==6.9.7 shellingham==1.4.0 six==1.16.0 +sniffio==1.3.0 +SQLAlchemy==2.0.23 sqlparse==0.4.2 starkbank-ecdsa==2.0.3 +systematic==4.8.7 +tenacity==8.2.3 +testcontainers==3.7.1 tomli==2.0.1 tomlkit==0.11.0 tqdm==4.64.0 typer==0.6.1 +typing-inspect==0.9.0 typing_extensions==4.4.0 uritemplate==4.1.1 urllib3==1.26.9 virtualenv==20.14.1 webencodings==0.5.1 +websocket-client==1.7.0 wrapt==1.14.1 xlrd==2.0.1 xlwt==1.3.0 xmltodict==0.13.0 +yarl==1.9.3 zipp==3.11.0 -ruamel.yaml -testcontainers -sqlalchemy -secrets==1.0.2 -langchain -openai -python-dotenv -pgvector -psycopg -tiktokens \ No newline at end of file +