s5_practice/scripts/parse.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import re\n",
    "import json"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "imageRefs = {\n",
    "    213: 0,\n",
    "    214: 1,\n",
    "    215: 2,\n",
    "    216: 1,\n",
    "    217: 4,\n",
    "    218: 5,\n",
    "    219: 6,\n",
    "    220: 7,\n",
    "    221: 8,\n",
    "    222: 9,\n",
    "    223: 10,\n",
    "    224: 11,\n",
    "    225: 12,\n",
    "    226: 13,\n",
    "    227: 14,\n",
    "    228: 15,\n",
    "    229: 16,\n",
    "    230: 17,\n",
    "    231: 18,\n",
    "    232: 19,\n",
    "    233: 20,\n",
    "    234: 21,\n",
    "    235: 22,\n",
    "    236: 23,\n",
    "    237: 24,\n",
    "    238: 25,\n",
    "    240: 26,\n",
    "    241: 27,\n",
    "    242: 28,\n",
    "    243: 29,\n",
    "    244: 30,\n",
    "    245: 31,\n",
    "    246: 32,\n",
    "    247: 33,\n",
    "    248: 34,\n",
    "    249: 35,\n",
    "    250: 36,\n",
    "    256: 37,\n",
    "    257: 38,\n",
    "    258: 39,\n",
    "    259: 40,\n",
    "    260: 41,\n",
    "    430: 77,\n",
    "    431: 78,\n",
    "    432: 79,\n",
    "    433: 80,\n",
    "    461: 81,\n",
    "    462: 82,\n",
    "    475: 83,\n",
    "    476: 84,\n",
    "    477: 85,\n",
    "    478: 86,\n",
    "    479: 87,\n",
    "    482: 42,\n",
    "    483: 43,\n",
    "    484: 44,\n",
    "    485: 45,\n",
    "    486: 46,\n",
    "    487: 47,\n",
    "    488: 48,\n",
    "    503: 49,\n",
    "    504: 50,\n",
    "    509: 51,\n",
    "    510: 52,\n",
    "    511: 53,\n",
    "    518: 54,\n",
    "    523: 55,\n",
    "    524: 56,\n",
    "    530: 57,\n",
    "    531: 58,\n",
    "    532: 59,\n",
    "    533: 60,\n",
    "    543: 61,\n",
    "    544: 62,\n",
    "    545: 63,\n",
    "    546: 64,\n",
    "    563: 65,\n",
    "    564: 66,\n",
    "    565: 67,\n",
    "    574: 68,\n",
    "    575: 69,\n",
    "    577: 70,\n",
    "    580: 71,\n",
    "    585: 72,\n",
    "    586: 73,\n",
    "    587: 74,\n",
    "    588: 75,\n",
    "    589: 76,\n",
    "    683: 88,\n",
    "    692: 89,\n",
    "    696: 90,\n",
    "    697: 91,\n",
    "    698: 92,\n",
    "    704: 93,\n",
    "    705: 94,\n",
    "    707: 95,\n",
    "    751: 96,\n",
    "    752: 97,\n",
    "    753: 98,\n",
    "    760: 99,\n",
    "    764: 100,\n",
    "    766: 101,\n",
    "    767: 102,\n",
    "    769: 103,\n",
    "    770: 104,\n",
    "    845: 105,\n",
    "    846: 106,\n",
    "    848: 107,\n",
    "    849: 108,\n",
    "    851: 109,\n",
    "    852: 110,\n",
    "    853: 111,\n",
    "    854: 112,\n",
    "    855: 113,\n",
    "    856: 114,\n",
    "    885: 115,\n",
    "    886: 116,\n",
    "    887: 117,\n",
    "    888: 118,\n",
    "    889: 119,\n",
    "    934: 120,\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "f = [\n",
    "    'C:/Users/Jakob/Documents/Ham/Izpitna vprašanja/A_01.txt',\n",
    "    'C:/Users/Jakob/Documents/Ham/Izpitna vprašanja/A_02.txt',\n",
    "    'C:/Users/Jakob/Documents/Ham/Izpitna vprašanja/A_03.txt',\n",
    "    'C:/Users/Jakob/Documents/Ham/Izpitna vprašanja/A_04.txt',\n",
    "]\n",
    "\n",
    "q_split = re.compile(r'\\n(?=\\d+\\.)')\n",
    "ql_split = re.compile(r'\\n\\s*')\n",
    "a_split = re.compile(r'\\n\\s*(?=[A-Z]\\.)')\n",
    "invalid_chars = re.compile(r'[^\\w\\s\\d\\n\\.?!:;,čšžČŠŽ/\"\\-–\\(\\)\\[\\]»«…=*%+\\\\\\{\\}$∞<>]')\n",
    "\n",
    "def map_a(s: str):\n",
    "    a = [re.sub(r'\\s+', ' ', ss.strip()) for ss in a_split.split(s)]\n",
    "    for i in range(len(a)):\n",
    "        if ord(a[i][0])-ord('A') != i:\n",
    "            raise Exception(s)\n",
    "    return [aa[2:].lstrip().rstrip('. ') for aa in a]\n",
    "\n",
    "def map_q(s: str):\n",
    "    t = ql_split.split(s)\n",
    "    ca = t[0][-1]\n",
    "    if ca < 'A' or ca > 'Z':\n",
    "        raise Exception(s)\n",
    "    t[0] = t[0][:-1].strip()\n",
    "    return ' '.join(t), ord(ca)-ord('A')\n",
    "\n",
    "def map_qa(s: str):\n",
    "    s = s.strip()\n",
    "    if invalid_chars.search(s):\n",
    "        raise Exception(s)\n",
    "    i = s.find('.')\n",
    "    id = int(s[:i])\n",
    "    s = s[i+1:].strip()\n",
    "    i = re.search(r'\\n{2,}', s)\n",
    "    if i == None:\n",
    "        q = ' '.join(ql_split.split(s))\n",
    "        a = ca = None\n",
    "    else:\n",
    "        i = i.start()\n",
    "        q, ca = map_q(s[:i].strip())\n",
    "        a = map_a(s[i+2:].strip())\n",
    "    img = imageRefs.get(id)\n",
    "    if img != None:\n",
    "        img = f'image-{img:03d}.png'\n",
    "    return {'id': id, 'question': q, 'image': img, 'answers': a, 'correct': ca}\n",
    "\n",
    "q = []\n",
    "for ff in f:\n",
    "    with open(ff, encoding='utf-8') as file:\n",
    "        q.extend(map(map_qa, q_split.split(file.read())))\n",
    "\n",
    "for i in range(len(q)):\n",
    "    if q[i]['id'] != i + 1:\n",
    "        raise Exception(f'{q[i-1][\"id\"]} != {i+1}')\n",
    "for qa in q:\n",
    "    if qa['answers'] != None and len(qa['answers']) != 3:\n",
    "        raise Exception(qa)\n",
    "\n",
    "print(len(q), 'questions')\n",
    "with open('../assets/questions.json', 'w', encoding='utf-8') as file:\n",
    "    json.dump(q, file, ensure_ascii=False, indent=2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Add categories\n",
    "categories = [\n",
    "    {'id': 1, 'title': 'Zgodovina radioamaterstva, IARU in ZRS', 'questions': [(1, 12)]},\n",
    "    {'id': 2, 'title': 'Osnovni pojmi o radijskih komunikacijah', 'questions': [(13, 24)]},\n",
    "    {'id': 3, 'title': 'Razdelitev frekvenc, moči, regioni, code', 'questions': [(25, 128), (239, 239)]},\n",
    "    {'id': 4, 'title': 'Mednarodni in slovenski predpisi', 'questions': [(129, 149)]},\n",
    "    {'id': 5, 'title': 'Q-kod in kratice', 'questions': [(150, 168), (173, 201), (251, 255), (261, 268), (299, 299)]},\n",
    "    {'id': 6, 'title': 'Operaterstvo', 'questions': [(169, 172), (283, 298), (375, 385), (388, 394)]},\n",
    "    {'id': 7, 'title': 'Prefiksi in sufiksi', 'questions': [(202, 212)]},\n",
    "    {'id': 9, 'title': 'Risanje', 'questions': [(240, 250), (256, 260)]},\n",
    "    {'id': 10, 'title': 'RST sistem', 'questions': [(269, 282)]},\n",
    "    {'id': 11, 'title': 'Digitalne komunikacije', 'questions': [(300, 312)]},\n",
    "    {'id': 12, 'title': 'Radioamaterski dnevnik', 'questions': [(313, 325)]},\n",
    "    {'id': 13, 'title': 'QSL kartice', 'questions': [(326, 337)]},\n",
    "    {'id': 14, 'title': 'Časocvne cone in UTC', 'questions': [(338, 344)]},\n",
    "    {'id': 15, 'title': 'Univerzalni lokator', 'questions': [(345, 350)]},\n",
    "    {'id': 16, 'title': 'Radioamaterska tekmovanja in diplome', 'questions': [(351, 362), (386, 387)]},\n",
    "    {'id': 17, 'title': 'Ham spirit in ARON', 'questions': [(363, 374)]},\n",
    "    {'id': 18, 'title': 'Fizika, elektrotehnika', 'questions': [(395, 454), (465, 492)]},\n",
    "    {'id': 19, 'title': 'Valovanje', 'questions': [(455, 464), (777, 804)]},\n",
    "    {'id': 20, 'title': 'Električna moč in izkoristek', 'questions': [(493, 504)]},\n",
    "    {'id': 21, 'title': 'Tuljave in kondenzatorji', 'questions': [(505, 541)]},\n",
    "    {'id': 22, 'title': 'Električni filtri', 'questions': [(542, 569)]},\n",
    "    {'id': 23, 'title': 'Polprevodniki in elektronike', 'questions': [(570, 591)]},\n",
    "    {'id': 24, 'title': 'Mikrofon in zvočnik', 'questions': [(592, 594)]},\n",
    "    {'id': 25, 'title': 'Električni sklopi', 'questions': [(595, 609)]},\n",
    "    {'id': 26, 'title': 'Digitalna obdelava signalov', 'questions': [(610, 624)]},\n",
    "    {'id': 27, 'title': 'Radijski valovi in signali', 'questions': [(625, 668)]},\n",
    "    {'id': 28, 'title': 'Radijski oddajniki', 'questions': [(669, 707)]},\n",
    "    {'id': 29, 'title': 'Radijski sprejemniki', 'questions': [(708, 776)]},\n",
    "    {'id': 30, 'title': 'Razširjanje radijskih valov', 'questions': [(805, 835)]},\n",
    "    {'id': 31, 'title': 'Antene', 'questions': [(213, 233), (836, 868)]},\n",
    "    {'id': 32, 'title': 'Napajanje anten', 'questions': [(234, 238), (869, 900)]},\n",
    "    {'id': 33, 'title': 'Motnje', 'questions': [(901, 904)]},\n",
    "    {'id': 34, 'title': 'Varnost pri delu', 'questions': [(905, 908)]},\n",
    "    {'id': 35, 'title': 'Meritve', 'questions': [(909, 970)]},\n",
    "]\n",
    "\n",
    "with open('../public/questions.json', encoding='utf-8') as file:\n",
    "    q = json.load(file)\n",
    "\n",
    "if type(q) is not list:\n",
    "    q = q['questions']\n",
    "    for i in range(len(q)):\n",
    "        id = q[i]['id']\n",
    "        for c in categories:\n",
    "            for fr, to in c['questions']:\n",
    "                if id >= fr and id <= to:\n",
    "                    q[i]['category'] = c['id']\n",
    "                    break\n",
    "            else:\n",
    "                continue\n",
    "            break\n",
    "\n",
    "categories = [{'id': c['id'], 'title': c['title']} for c in categories]\n",
    "\n",
    "# Remove \"Risanje\"\n",
    "q = list(filter(lambda x: x['category'] != 9, q))\n",
    "categories = list(filter(lambda x: x['id'] != 9, categories))\n",
    "\n",
    "q = {'categories': categories, 'questions': q}\n",
    "\n",
    "with open('../public/questions.json', 'w', encoding='utf-8') as file:\n",
    "    json.dump(q, file, ensure_ascii=False, indent=2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('../assets/questions.json', encoding='utf-8') as file:\n",
    "    q = json.load(file)\n",
    "\n",
    "for qq in q['questions']:\n",
    "    if 'upor' in qq['question'] and any(map(lambda a: 'W' in a, qq['answers'])):\n",
    "        print(qq)\n",
    "\n",
    "print('---')\n",
    "\n",
    "cmp = set()\n",
    "for c in q['categories']:\n",
    "    for f, t in c['questions']:\n",
    "        all = list(range(f, t+1))\n",
    "        if len(cmp.intersection(all)) != 0:\n",
    "            print(f, t, c['title'])\n",
    "            print(cmp.intersection(all))\n",
    "            print()\n",
    "        cmp.update(all)\n",
    "\n",
    "print('---')\n",
    "\n",
    "all = set(range(1, q['questions'][-1]['id']+1))\n",
    "print(len(all), len(cmp))\n",
    "if cmp != all:\n",
    "    print(all.difference(cmp))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.1"
  },
  "orig_nbformat": 4,
  "vscode": {
   "interpreter": {
    "hash": "5238573367df39f7286bb46f9ff5f08f63a01a80960060ce41e3c79b190280fa"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}