make dataset parsing more robust
All checks were successful
continuous-integration/drone/push Build is passing

This commit is contained in:
j3d1 2024-03-11 17:37:56 +01:00
parent f2db5d9dad
commit 8cf0897ec5
13 changed files with 337 additions and 36 deletions

View file

@ -1,6 +1,6 @@
from django.contrib import admin
from .models import Domain
from .models import Domain, ImportedIdentifierSets
class DomainAdmin(admin.ModelAdmin):
@ -9,3 +9,11 @@ class DomainAdmin(admin.ModelAdmin):
admin.site.register(Domain, DomainAdmin)
class ImportedIdentifierSetsAdmin(admin.ModelAdmin):
list_display = ('name', 'hash', 'created_at')
list_filter = ('name', 'hash', 'created_at')
admin.site.register(ImportedIdentifierSets, ImportedIdentifierSetsAdmin)

View file

@ -0,0 +1,39 @@
# Generated by Django 4.2.2 on 2024-03-11 15:19
import os
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('hostadmin', '0002_importedidentifiersets'),
]
def calculate_hash(apps, schema_editor):
from hostadmin.models import ImportedIdentifierSets
for identifier_set in ImportedIdentifierSets.objects.all():
if not identifier_set.hash:
print("update", identifier_set.name)
filename = "shared_data/" + identifier_set.name.strip('git:') + ".json"
if not os.path.exists(filename):
continue
from hashlib import sha256
with open(filename, 'r') as file:
data = file.read()
identifier_set.hash = sha256(data.encode()).hexdigest()
identifier_set.save()
operations = [
migrations.AddField(
model_name='importedidentifiersets',
name='hash',
field=models.CharField(blank=True, max_length=255, null=True),
),
migrations.RunPython(calculate_hash),
migrations.AlterField(
model_name='importedidentifiersets',
name='hash',
field=models.CharField(max_length=255, unique=True),
),
]

View file

@ -0,0 +1,17 @@
# Generated by Django 4.2.2 on 2024-03-14 16:33
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
('hostadmin', '0003_importedidentifiersets_hash'),
]
operations = [
migrations.AlterModelOptions(
name='importedidentifiersets',
options={'verbose_name_plural': 'imported identifier sets'},
),
]

View file

@ -12,4 +12,8 @@ class Domain(models.Model):
class ImportedIdentifierSets(models.Model):
name = models.CharField(max_length=255, unique=True)
hash = models.CharField(max_length=255, unique=True)
created_at = models.DateTimeField(auto_now_add=True)
class Meta:
verbose_name_plural = 'imported identifier sets'

View file

@ -5,6 +5,32 @@ from hostadmin.models import Domain
from toolshed.models import Category, Property, Tag
class SlugPathField(serializers.SlugRelatedField):
def to_internal_value(self, data):
path = data.split('/') if '/' in data else [data]
candidates = self.get_queryset().filter(name=path[-1])
if len(candidates) == 1:
return candidates.first()
if len(candidates) == 0:
raise serializers.ValidationError(
"No {} with name '{}' found".format(self.queryset.model.__name__, path[-1]))
if len(candidates) > 1 and len(path) == 1:
raise serializers.ValidationError("Multiple {}s with name '{}' found, please specify the parent".format(
self.queryset.model.__name__, path[-1]))
parent = self.to_internal_value('/'.join(path[:-1]))
candidates = self.get_queryset().filter(name=path[-1], parent=parent)
if len(candidates) == 1:
return candidates.first()
if len(candidates) == 0:
raise serializers.ValidationError(
"No {} with name '{}' found".format(self.queryset.model.__name__, path[-1]))
def to_representation(self, value):
source = getattr(value, self.field_name, None) # should this use self.source?
prefix = self.to_representation(source) + '/' if source else ''
return prefix + getattr(value, self.slug_field)
class DomainSerializer(serializers.ModelSerializer):
owner = OwnerSerializer(read_only=True)
@ -12,12 +38,21 @@ class DomainSerializer(serializers.ModelSerializer):
model = Domain
fields = ['name', 'owner', 'open_registration']
def create(self, validated_data):
return super().create(validated_data)
class CategorySerializer(serializers.ModelSerializer):
parent = serializers.SlugRelatedField(slug_field='name', queryset=Category.objects.all(), required=False)
parent = SlugPathField(slug_field='name', queryset=Category.objects.all(), required=False)
def validate(self, attrs):
if 'name' in attrs:
if '/' in attrs['name']:
raise serializers.ValidationError("Category name cannot contain '/'")
return attrs
def create(self, validated_data):
try:
return Category.objects.create(**validated_data)
except Exception as e:
raise serializers.ValidationError(e)
class Meta:
model = Category
@ -27,7 +62,19 @@ class CategorySerializer(serializers.ModelSerializer):
class PropertySerializer(serializers.ModelSerializer):
category = serializers.SlugRelatedField(slug_field='name', queryset=Category.objects.all(), required=False)
category = SlugPathField(slug_field='name', queryset=Category.objects.all(), required=False)
def validate(self, attrs):
if 'name' in attrs:
if '/' in attrs['name']:
raise serializers.ValidationError("Property name cannot contain '/'")
return attrs
def create(self, validated_data):
try:
return Property.objects.create(**validated_data)
except Exception as e:
raise serializers.ValidationError(e)
class Meta:
model = Property
@ -38,7 +85,19 @@ class PropertySerializer(serializers.ModelSerializer):
class TagSerializer(serializers.ModelSerializer):
category = serializers.SlugRelatedField(slug_field='name', queryset=Category.objects.all(), required=False)
category = SlugPathField(slug_field='name', queryset=Category.objects.all(), required=False)
def validate(self, attrs):
if 'name' in attrs:
if '/' in attrs['name']:
raise serializers.ValidationError("Tag name cannot contain '/'")
return attrs
def create(self, validated_data):
try:
return Tag.objects.create(**validated_data)
except Exception as e:
raise serializers.ValidationError(e)
class Meta:
model = Tag

View file

@ -100,7 +100,8 @@ class CategoryApiTestCase(UserTestMixin, CategoryTestMixin, ToolshedTestCase):
response = client.get('/api/categories/', self.f['local_user1'])
self.assertEqual(response.status_code, 200)
self.assertEqual(response.json(),
["cat1", "cat2", "cat3", "cat1/subcat1", "cat1/subcat2", "cat1/subcat1/subcat3"])
["cat1", "cat2", "cat3", "cat1/subcat1",
"cat1/subcat2", "cat1/subcat1/subcat1", "cat1/subcat1/subcat2"])
def test_admin_get_categories_fail(self):
response = client.get('/admin/categories/', self.f['local_user1'])
@ -109,7 +110,7 @@ class CategoryApiTestCase(UserTestMixin, CategoryTestMixin, ToolshedTestCase):
def test_admin_get_categories(self):
response = client.get('/admin/categories/', self.f['admin'])
self.assertEqual(response.status_code, 200)
self.assertEqual(len(response.json()), 6)
self.assertEqual(len(response.json()), 7)
self.assertEqual(response.json()[0]['name'], 'cat1')
self.assertEqual(response.json()[1]['name'], 'cat2')
self.assertEqual(response.json()[2]['name'], 'cat3')
@ -117,10 +118,12 @@ class CategoryApiTestCase(UserTestMixin, CategoryTestMixin, ToolshedTestCase):
self.assertEqual(response.json()[3]['parent'], 'cat1')
self.assertEqual(response.json()[4]['name'], 'subcat2')
self.assertEqual(response.json()[4]['parent'], 'cat1')
self.assertEqual(response.json()[5]['name'], 'subcat3')
self.assertEqual(response.json()[5]['parent'], 'subcat1')
self.assertEqual(response.json()[5]['name'], 'subcat1')
self.assertEqual(response.json()[5]['parent'], 'cat1/subcat1')
self.assertEqual(response.json()[6]['name'], 'subcat2')
self.assertEqual(response.json()[6]['parent'], 'cat1/subcat1')
def test_admin_create_category(self):
def test_admin_post_category(self):
response = client.post('/admin/categories/', self.f['admin'], {'name': 'cat4'})
self.assertEqual(response.status_code, 201)
self.assertEqual(response.json()['name'], 'cat4')
@ -128,6 +131,40 @@ class CategoryApiTestCase(UserTestMixin, CategoryTestMixin, ToolshedTestCase):
self.assertEqual(response.json()['parent'], None)
self.assertEqual(response.json()['origin'], 'api')
def test_admin_post_category_duplicate(self):
response = client.post('/admin/categories/', self.f['admin'], {'name': 'cat3'})
self.assertEqual(response.status_code, 400)
def test_admin_post_category_invalid(self):
response = client.post('/admin/categories/', self.f['admin'], {'name': 'cat/4'})
self.assertEqual(response.status_code, 400)
def test_admin_post_category_parent_not_found(self):
response = client.post('/admin/categories/', self.f['admin'], {'name': 'subcat4', 'parent': 'cat4'})
self.assertEqual(response.status_code, 400)
def test_admin_post_category_parent_ambiguous(self):
response = client.post('/admin/categories/', self.f['admin'], {'name': 'subcat4', 'parent': 'subcat1'})
self.assertEqual(response.status_code, 400)
def test_admin_post_category_parent_subcategory(self):
response = client.post('/admin/categories/', self.f['admin'], {'name': 'subcat4', 'parent': 'cat1/subcat1'})
self.assertEqual(response.status_code, 201)
self.assertEqual(response.json()['name'], 'subcat4')
self.assertEqual(response.json()['description'], None)
self.assertEqual(response.json()['parent'], 'cat1/subcat1')
self.assertEqual(response.json()['origin'], 'api')
def test_admin_post_category_parent_subcategory_not_found(self):
response = client.post('/admin/categories/', self.f['admin'], {'name': 'subcat4', 'parent': 'cat2/subcat1'})
self.assertEqual(response.status_code, 400)
def test_admin_post_category_parent_subcategory_ambiguous(self):
from toolshed.models import Category
self.f['subcat111'] = Category.objects.create(name='subcat1', parent=self.f['subcat11'], origin='test')
response = client.post('/admin/categories/', self.f['admin'], {'name': 'subcat4', 'parent': 'subcat1/subcat1'})
self.assertEqual(response.status_code, 400)
def test_admin_post_subcategory(self):
response = client.post('/admin/categories/', self.f['admin'], {'name': 'subcat4', 'parent': 'cat1'})
self.assertEqual(response.status_code, 201)
@ -136,6 +173,18 @@ class CategoryApiTestCase(UserTestMixin, CategoryTestMixin, ToolshedTestCase):
self.assertEqual(response.json()['parent'], 'cat1')
self.assertEqual(response.json()['origin'], 'api')
def test_admin_post_subcategory_duplicate(self):
response = client.post('/admin/categories/', self.f['admin'], {'name': 'subcat2', 'parent': 'cat1'})
self.assertEqual(response.status_code, 400)
def test_admin_post_subcategory_distinct_duplicate(self):
response = client.post('/admin/categories/', self.f['admin'], {'name': 'subcat2', 'parent': 'cat2'})
self.assertEqual(response.status_code, 201)
self.assertEqual(response.json()['name'], 'subcat2')
self.assertEqual(response.json()['description'], None)
self.assertEqual(response.json()['parent'], 'cat2')
self.assertEqual(response.json()['origin'], 'api')
def test_admin_put_category(self):
response = client.put('/admin/categories/1/', self.f['admin'], {'name': 'cat5'})
self.assertEqual(response.status_code, 200)
@ -188,6 +237,14 @@ class TagApiTestCase(UserTestMixin, CategoryTestMixin, TagTestMixin, ToolshedTes
self.assertEqual(response.json()['origin'], 'api')
self.assertEqual(response.json()['category'], None)
def test_admin_create_tag_duplicate(self):
response = client.post('/admin/tags/', self.f['admin'], {'name': 'tag3'})
self.assertEqual(response.status_code, 400)
def test_admin_create_tag_invalid(self):
response = client.post('/admin/tags/', self.f['admin'], {'name': 'tag/4'})
self.assertEqual(response.status_code, 400)
def test_admin_put_tag(self):
response = client.put('/admin/tags/1/', self.f['admin'], {'name': 'tag5'})
self.assertEqual(response.status_code, 200)
@ -250,7 +307,13 @@ class PropertyApiTestCase(UserTestMixin, CategoryTestMixin, PropertyTestMixin, T
self.assertEqual(response.json()['base2_prefix'], False)
self.assertEqual(response.json()['dimensions'], 1)
# self.assertEqual(response.json()['sort_lexicographically'], False)
def test_admin_create_property_duplicate(self):
response = client.post('/admin/properties/', self.f['admin'], {'name': 'prop3', 'category': 'cat1'})
self.assertEqual(response.status_code, 400)
def test_admin_create_property_invalid(self):
response = client.post('/admin/properties/', self.f['admin'], {'name': 'prop/4'})
self.assertEqual(response.status_code, 400)
def test_admin_put_property(self):
response = client.put('/admin/properties/1/', self.f['admin'], {'name': 'prop5'})
@ -265,8 +328,6 @@ class PropertyApiTestCase(UserTestMixin, CategoryTestMixin, PropertyTestMixin, T
self.assertEqual(response.json()['base2_prefix'], False)
self.assertEqual(response.json()['dimensions'], 1)
# self.assertEqual(response.json()['sort_lexicographically'], False)
def test_admin_patch_property(self):
response = client.patch('/admin/properties/1/', self.f['admin'], {'name': 'prop5'})
self.assertEqual(response.status_code, 200)