From 3b0247adaca45a753a73b4d26ae2c5336dcc9554 Mon Sep 17 00:00:00 2001 From: ugeshdg Date: Wed, 6 Dec 2023 17:23:03 +0530 Subject: [PATCH 01/12] file added in choices --- datahub/models.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/datahub/models.py b/datahub/models.py index c0c87430..c4636a1a 100644 --- a/datahub/models.py +++ b/datahub/models.py @@ -296,7 +296,9 @@ def __str__(self) -> str: RESOURCE_URL_TYPE = ( ("youtube", "youtube"), - ("pdf", "pdf") + ("pdf", "pdf"), + ("file", "file") + ) class ResourceFile(TimeStampMixin): From e0c7e056291647c881c2eb9ef24508fb705a75f9 Mon Sep 17 00:00:00 2001 From: ugeshdg Date: Wed, 6 Dec 2023 17:34:30 +0530 Subject: [PATCH 02/12] requirements updated --- requirements.txt | 45 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 34 insertions(+), 11 deletions(-) diff --git a/requirements.txt b/requirements.txt index 8959bc00..a69436c1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,11 @@ +aiohttp==3.9.1 +aiosignal==1.3.1 +anyio==3.7.1 asgiref==3.6.0 astroid==2.13.2 +async-timeout==4.0.3 asyncio==3.4.3 attrs==21.4.0 -backports.zoneinfo==0.2.1 CacheControl==0.12.11 cachy==0.3.0 certifi==2022.5.18.1 @@ -11,13 +14,16 @@ charset-normalizer==2.0.12 cleo==0.8.1 click==8.1.3 clikit==0.6.2 +configobj==5.0.8 coreapi==2.3.3 coreschema==0.0.4 coverage==7.0.3 crashtest==0.3.1 cryptography==37.0.2 cssutils==2.6.0 +dataclasses-json==0.6.3 dateutils==0.6.12 +deprecation==2.1.0 dill==0.3.6 distlib==0.3.4 Django==4.1.5 @@ -31,6 +37,7 @@ django-rest-framework-braces==0.3.4 django_debug_toolbar==3.8.1 djangorestframework==3.13.1 djangorestframework-simplejwt==5.2.2 +docker==6.1.3 drf-generators==0.5.0 drf-spectacular==0.22.1 drf-spectacular-sidecar==2022.7.1 @@ -41,6 +48,8 @@ factory-boy==3.2.1 Faker==16.6.0 filelock==3.9.0 filetype==1.1.0 +frozenlist==1.4.0 +future==0.18.3 html5lib==1.1 idna==3.3 importlib-metadata==6.0.0 @@ -52,25 +61,34 @@ itypes==1.2.0 jaraco.classes==3.2.3 jeepney==0.8.0 Jinja2==3.1.2 +jsonpatch==1.33 +jsonpointer==2.4 jsonschema==4.17.3 keyring==23.13.1 +langchain==0.0.346 +langchain-core==0.0.10 +langsmith==0.0.69 lazy-object-proxy==1.7.1 lockfile==0.12.2 MarkupSafe==2.1.1 +marshmallow==3.20.1 mccabe==0.7.0 model-bakery==1.9.0 more-itertools==9.0.0 msgpack==1.0.4 +multidict==6.0.4 mypy-extensions==0.4.3 mysql-connector-python==8.0.28 nose==1.3.7 numpy==1.24.1 +openai==0.28.1 openpyxl==3.0.10 packaging==20.9 pandas==1.5.2 pastel==0.2.1 pathspec==0.9.0 pexpect==4.8.0 +pgvector==0.2.4 phonenumbers==8.13.15 Pillow==9.4.0 pkginfo==1.8.2 @@ -81,10 +99,13 @@ pluggy==1.0.0 poetry==1.1.13 poetry-core==1.0.8 protobuf==3.20.1 +psycopg==3.1.14 psycopg2-binary==2.9.3 ptyprocess==0.7.0 py==1.11.0 pyaml==21.10.1 +pyasn1==0.5.1 +pyasn1-modules==0.3.0 pycparser==2.21 pydantic==1.9.2 PyJWT==2.4.0 @@ -97,7 +118,10 @@ pytest==7.2.0 pytest-django==4.5.2 pytest-factoryboy==2.5.1 python-dateutil==2.8.2 +python-dotenv==1.0.0 +python-gnupg==0.5.1 python-http-client==3.3.7 +python-ldap==3.4.4 python-magic==0.4.27 python-on-whales==0.55.0 pytz==2022.1 @@ -106,33 +130,32 @@ requests==2.27.1 requests-toolbelt==0.9.1 ruamel.yaml==0.17.21 ruamel.yaml.clib==0.2.6 +secrets==1.0.2 SecretStorage==3.3.2 sendgrid==6.9.7 shellingham==1.4.0 six==1.16.0 +sniffio==1.3.0 +SQLAlchemy==2.0.23 sqlparse==0.4.2 starkbank-ecdsa==2.0.3 +systematic==4.8.7 +tenacity==8.2.3 +testcontainers==3.7.1 tomli==2.0.1 tomlkit==0.11.0 tqdm==4.64.0 typer==0.6.1 +typing-inspect==0.9.0 typing_extensions==4.4.0 uritemplate==4.1.1 urllib3==1.26.9 virtualenv==20.14.1 webencodings==0.5.1 +websocket-client==1.7.0 wrapt==1.14.1 xlrd==2.0.1 xlwt==1.3.0 xmltodict==0.13.0 +yarl==1.9.3 zipp==3.11.0 -ruamel.yaml -testcontainers -sqlalchemy -secrets==1.0.2 -langchain -openai -python-dotenv -pgvector -psycopg -tiktokens \ No newline at end of file From 2396068f588b7d1d6e346567bbd1aa23ba4949b4 Mon Sep 17 00:00:00 2001 From: piyushDG Date: Wed, 6 Dec 2023 17:44:20 +0530 Subject: [PATCH 03/12] Dockerfile updated --- Dockerfile | 32 +++++++++++++------------------- 1 file changed, 13 insertions(+), 19 deletions(-) diff --git a/Dockerfile b/Dockerfile index 2eed37db..e90b8250 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,33 +1,27 @@ FROM python:3.8-slim -RUN set -ex \ - && apt-get -y update && apt-get -y upgrade \ - && apt install python3-pip -y \ - && apt install curl -y \ +# Install dependencies +RUN apt-get update && apt-get install -y libsasl2-dev curl gcc libldap2-dev \ && DOCKER_CONFIG=${DOCKER_CONFIG:-$HOME/.docker} \ && mkdir -p $DOCKER_CONFIG/cli-plugins \ && curl -SL https://github.com/docker/compose/releases/download/v2.2.3/docker-compose-linux-x86_64 -o $DOCKER_CONFIG/cli-plugins/docker-compose \ && chmod +x $DOCKER_CONFIG/cli-plugins/docker-compose - -ADD . /datahub +# Set the working directory and copy the application files WORKDIR /datahub +COPY . /datahub -# RUN poetry init -RUN python3 -m pip install --upgrade pip -RUN pip install -r requirements.txt - -# RUN python manage.py makemigrations \ -# && python manage.py migrate \ -# && python manage.py loaddata db_scripts/userrole_fixture.yaml \ -# && python manage.py loaddata db_scripts/initial_data.yaml +# Upgrade pip and install required Python packages +RUN python -m pip install --upgrade pip \ + && pip install python-ldap==3.3.1 \ + && pip install --upgrade pyopenssl \ + && pip install -r requirements.txt -ENV PYTHONUNBUFFERED 1 -# ENV VIRTUAL_ENV /env - -# ENV PATH /env/bin:$PATH +# Set environment variables +ENV PYTHONUNBUFFERED 1 +# Expose port 8000 for the Django app EXPOSE 8000 - +# Command to run the Django development server CMD ["python", "manage.py", "runserver", "0.0.0.0:8000"] From deda9ca33b4ff11e70d5c756b77cd2122ec47763 Mon Sep 17 00:00:00 2001 From: ugeshdg Date: Wed, 6 Dec 2023 20:11:31 +0530 Subject: [PATCH 04/12] resource immutable added --- datahub/views.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datahub/views.py b/datahub/views.py index d9e22b3b..c4484e24 100644 --- a/datahub/views.py +++ b/datahub/views.py @@ -2813,7 +2813,7 @@ class ResourceManagementViewSet(GenericViewSet): def create(self, request, *args, **kwargs): try: user_map = request.META.get("map_id") - #request.data._mutable = True + request.data._mutable = True request.data["user_map"] = user_map serializer = self.get_serializer(data=request.data) From 73581419aaff19e6140c91098af7f7356b5d50a6 Mon Sep 17 00:00:00 2001 From: ugeshdg <96171777+ugeshdg@users.noreply.github.com> Date: Wed, 6 Dec 2023 21:00:06 +0530 Subject: [PATCH 05/12] Update serializers.py --- datahub/serializers.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/datahub/serializers.py b/datahub/serializers.py index dff3eaf0..285ec6f5 100644 --- a/datahub/serializers.py +++ b/datahub/serializers.py @@ -997,8 +997,11 @@ def get_content_files_count(self, resource): def create(self, validated_data): resource_files_data = validated_data.pop("uploaded_files") resource = Resource.objects.create(**validated_data) + if resource_files_data: + resource_files_data = json.loads(resource_files_data[0]) for file_data in resource_files_data: # file_size = file_data.size + import pdb; pdb.set_trace() ResourceFile.objects.create(resource=resource, **file_data) # url=file_data.get("url", ""), # type=file_data.get("type", ""), From aa4dc26c01469b578295626fa0c6b8f0e48433fc Mon Sep 17 00:00:00 2001 From: ugeshdg <96171777+ugeshdg@users.noreply.github.com> Date: Wed, 6 Dec 2023 21:01:57 +0530 Subject: [PATCH 06/12] Update serializers.py --- datahub/serializers.py | 1 - 1 file changed, 1 deletion(-) diff --git a/datahub/serializers.py b/datahub/serializers.py index 285ec6f5..b3446a2f 100644 --- a/datahub/serializers.py +++ b/datahub/serializers.py @@ -1001,7 +1001,6 @@ def create(self, validated_data): resource_files_data = json.loads(resource_files_data[0]) for file_data in resource_files_data: # file_size = file_data.size - import pdb; pdb.set_trace() ResourceFile.objects.create(resource=resource, **file_data) # url=file_data.get("url", ""), # type=file_data.get("type", ""), From f3ef8305cf1f9b2f369eea861635d08823a01d11 Mon Sep 17 00:00:00 2001 From: ugeshdg <96171777+ugeshdg@users.noreply.github.com> Date: Thu, 7 Dec 2023 12:09:53 +0530 Subject: [PATCH 07/12] Update views.py --- datahub/views.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datahub/views.py b/datahub/views.py index c4484e24..7f9d8e28 100644 --- a/datahub/views.py +++ b/datahub/views.py @@ -2911,7 +2911,7 @@ class ResourceFileManagementViewSet(GenericViewSet): @http_request_mutation def create(self, request, *args, **kwargs): try: - #request.data._mutable = True + request.data._mutable = True #request.data["file_size"] = request.FILES.get("file").size serializer = self.get_serializer(data=request.data, partial=True) serializer.is_valid(raise_exception=True) From a9faacda71a8485ceb38ab311044cdd584c61d6a Mon Sep 17 00:00:00 2001 From: ugeshdg Date: Thu, 7 Dec 2023 13:48:49 +0530 Subject: [PATCH 08/12] migrations updated --- ..._more.py => 0041_resource_resourcefile.py} | 34 +++------------- datahub/models.py | 39 +++++++++---------- 2 files changed, 24 insertions(+), 49 deletions(-) rename datahub/migrations/{0041_langchainpgcollection_langchainpgembedding_resource_and_more.py => 0041_resource_resourcefile.py} (72%) diff --git a/datahub/migrations/0041_langchainpgcollection_langchainpgembedding_resource_and_more.py b/datahub/migrations/0041_resource_resourcefile.py similarity index 72% rename from datahub/migrations/0041_langchainpgcollection_langchainpgembedding_resource_and_more.py rename to datahub/migrations/0041_resource_resourcefile.py index 86867db3..1beaecf1 100644 --- a/datahub/migrations/0041_langchainpgcollection_langchainpgembedding_resource_and_more.py +++ b/datahub/migrations/0041_resource_resourcefile.py @@ -1,8 +1,7 @@ -# Generated by Django 4.1.5 on 2023-12-06 07:46 +# Generated by Django 4.1.5 on 2023-12-07 08:17 from django.db import migrations, models import django.db.models.deletion -import pgvector.django import uuid @@ -13,31 +12,6 @@ class Migration(migrations.Migration): ] operations = [ - migrations.CreateModel( - name="LangchainPgCollection", - fields=[ - ("name", models.UUIDField()), - ("cmetadata", models.JSONField()), - ("uuid", models.UUIDField(primary_key=True, serialize=False)), - ], - options={ - "db_table": "langchain_pg_collection", - }, - ), - migrations.CreateModel( - name="LangchainPgEmbedding", - fields=[ - ("collection_id", models.UUIDField()), - ("embedding", pgvector.django.VectorField(verbose_name=1563)), - ("document", models.TextField()), - ("cmetadata", models.JSONField()), - ("custom_id", models.CharField(max_length=255)), - ("uuid", models.UUIDField(primary_key=True, serialize=False)), - ], - options={ - "db_table": "langchain_pg_embedding", - }, - ), migrations.CreateModel( name="Resource", fields=[ @@ -91,7 +65,11 @@ class Migration(migrations.Migration): ( "type", models.CharField( - choices=[("youtube", "youtube"), ("pdf", "pdf")], + choices=[ + ("youtube", "youtube"), + ("pdf", "pdf"), + ("file", "file"), + ], max_length=20, null=True, ), diff --git a/datahub/models.py b/datahub/models.py index c4636a1a..d82bca29 100644 --- a/datahub/models.py +++ b/datahub/models.py @@ -322,29 +322,26 @@ def __str__(self) -> str: # class ResourceVector(TimeStampMixin): # resource_file = models.ForeignKey(ResourceFile, on_delete=models.CASCADE, related_name="resource_file") -class LangchainPgCollection(models.Model): - name = models.UUIDField() - cmetadata = models.JSONField() - uuid = models.UUIDField(primary_key=True) +# class LangchainPgCollection(models.Model): +# name = models.UUIDField() +# cmetadata = models.JSONField() +# uuid = models.UUIDField(primary_key=True) - class Meta: - db_table = 'langchain_pg_collection' - - -class LangchainPgEmbedding(models.Model): - # resource_file = models.ForeignKey(ResourceFile, on_delete=models.CASCADE) - collection_id = models.UUIDField() - embedding = VectorField(1563) # Assuming 'vector' is a custom PostgreSQL data type - document = models.TextField() - cmetadata = models.JSONField() - custom_id = models.CharField(max_length=255) - uuid = models.UUIDField(primary_key=True) +# class Meta: +# db_table = 'langchain_pg_collection' - class Meta: - db_table = 'langchain_pg_embedding' - - def __str__(self): - return f"LangchainPgEmbedding(uuid={self.uuid}, document={self.document})" +# class LangchainPgEmbedding(models.Model): +# # resource_file = models.ForeignKey(ResourceFile, on_delete=models.CASCADE) +# collection_id = models.UUIDField() +# embedding = VectorField(1563) # Assuming 'vector' is a custom PostgreSQL data type +# document = models.TextField() +# cmetadata = models.JSONField() +# custom_id = models.CharField(max_length=255) +# uuid = models.UUIDField(primary_key=True) +# class Meta: +# db_table = 'langchain_pg_embedding' +# def __str__(self): +# return f"LangchainPgEmbedding(uuid={self.uuid}, document={self.document})" From fea8293f661445bd2c5a24a56906ae83f35bdf3b Mon Sep 17 00:00:00 2001 From: ugeshdg Date: Thu, 7 Dec 2023 14:07:23 +0530 Subject: [PATCH 09/12] serializer updated --- datahub/serializers.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/datahub/serializers.py b/datahub/serializers.py index b531f9ae..92665cda 100644 --- a/datahub/serializers.py +++ b/datahub/serializers.py @@ -8,6 +8,7 @@ from django.conf import settings from django.core.exceptions import ObjectDoesNotExist, ValidationError from django.core.validators import URLValidator +from django.db.models import Count, Q from django.utils.translation import gettext as _ from rest_framework import serializers, status @@ -41,8 +42,11 @@ ) from .models import Policy, Resource, ResourceFile, UsagePolicy +<<<<<<< HEAD from django.core.validators import URLValidator from django.core.exceptions import ValidationError +======= +>>>>>>> 67a32ee (serializers updated) LOGGER = logging.getLogger(__name__) From 9356643ce6bebe1741d138858357c727e8e99b86 Mon Sep 17 00:00:00 2001 From: ugeshdg Date: Thu, 7 Dec 2023 15:09:11 +0530 Subject: [PATCH 10/12] serializer updated --- datahub/serializers.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/datahub/serializers.py b/datahub/serializers.py index 92665cda..12e4f485 100644 --- a/datahub/serializers.py +++ b/datahub/serializers.py @@ -42,11 +42,6 @@ ) from .models import Policy, Resource, ResourceFile, UsagePolicy -<<<<<<< HEAD -from django.core.validators import URLValidator -from django.core.exceptions import ValidationError -======= ->>>>>>> 67a32ee (serializers updated) LOGGER = logging.getLogger(__name__) From 55ce894f514a3faf32e1afcf0a4722b374975579 Mon Sep 17 00:00:00 2001 From: ugeshdg Date: Thu, 7 Dec 2023 15:14:50 +0530 Subject: [PATCH 11/12] migrations updated --- .../migrations/0042_merge_20231207_0944.py | 16 +++++++++++ ...3_delete_langchainpgcollection_and_more.py | 28 +++++++++++++++++++ datahub/models.py | 3 +- 3 files changed, 45 insertions(+), 2 deletions(-) create mode 100644 datahub/migrations/0042_merge_20231207_0944.py create mode 100644 datahub/migrations/0043_delete_langchainpgcollection_and_more.py diff --git a/datahub/migrations/0042_merge_20231207_0944.py b/datahub/migrations/0042_merge_20231207_0944.py new file mode 100644 index 00000000..263ad5e6 --- /dev/null +++ b/datahub/migrations/0042_merge_20231207_0944.py @@ -0,0 +1,16 @@ +# Generated by Django 4.1.5 on 2023-12-07 09:44 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ( + "datahub", + "0041_langchainpgcollection_langchainpgembedding_resource_and_more", + ), + ("datahub", "0041_resource_resourcefile"), + ] + + operations = [] diff --git a/datahub/migrations/0043_delete_langchainpgcollection_and_more.py b/datahub/migrations/0043_delete_langchainpgcollection_and_more.py new file mode 100644 index 00000000..6bbf8bf3 --- /dev/null +++ b/datahub/migrations/0043_delete_langchainpgcollection_and_more.py @@ -0,0 +1,28 @@ +# Generated by Django 4.1.5 on 2023-12-07 09:44 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("datahub", "0042_merge_20231207_0944"), + ] + + operations = [ + migrations.DeleteModel( + name="LangchainPgCollection", + ), + migrations.DeleteModel( + name="LangchainPgEmbedding", + ), + migrations.AlterField( + model_name="resourcefile", + name="type", + field=models.CharField( + choices=[("youtube", "youtube"), ("pdf", "pdf"), ("file", "file")], + max_length=20, + null=True, + ), + ), + ] diff --git a/datahub/models.py b/datahub/models.py index 37435df3..b0e0db1e 100644 --- a/datahub/models.py +++ b/datahub/models.py @@ -317,7 +317,7 @@ class ResourceFile(TimeStampMixin): def __str__(self) -> str: return self.file.name -from pgvector.django import VectorField +# from pgvector.django import VectorField # class ResourceVector(TimeStampMixin): # resource_file = models.ForeignKey(ResourceFile, on_delete=models.CASCADE, related_name="resource_file") @@ -345,4 +345,3 @@ def __str__(self) -> str: # def __str__(self): # return f"LangchainPgEmbedding(uuid={self.uuid}, document={self.document})" - From 7bcb3a4b21b6b051a029f8c867b705e0f85bef98 Mon Sep 17 00:00:00 2001 From: ugeshdg Date: Wed, 20 Dec 2023 00:43:15 +0530 Subject: [PATCH 12/12] title and desc updated --- ...source_description_alter_resource_title.py | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 datahub/migrations/0044_alter_resource_description_alter_resource_title.py diff --git a/datahub/migrations/0044_alter_resource_description_alter_resource_title.py b/datahub/migrations/0044_alter_resource_description_alter_resource_title.py new file mode 100644 index 00000000..de4b1eb2 --- /dev/null +++ b/datahub/migrations/0044_alter_resource_description_alter_resource_title.py @@ -0,0 +1,23 @@ +# Generated by Django 4.1.5 on 2023-12-19 18:56 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("datahub", "0043_delete_langchainpgcollection_and_more"), + ] + + operations = [ + migrations.AlterField( + model_name="resource", + name="description", + field=models.TextField(max_length=500), + ), + migrations.AlterField( + model_name="resource", + name="title", + field=models.CharField(max_length=200), + ), + ]