s5_practice/scripts/parse.ipynb
2023-02-24 09:25:25 +01:00

348 lines
11 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import re\n",
"import json"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"imageRefs = {\n",
" 213: 0,\n",
" 214: 1,\n",
" 215: 2,\n",
" 216: 1,\n",
" 217: 4,\n",
" 218: 5,\n",
" 219: 6,\n",
" 220: 7,\n",
" 221: 8,\n",
" 222: 9,\n",
" 223: 10,\n",
" 224: 11,\n",
" 225: 12,\n",
" 226: 13,\n",
" 227: 14,\n",
" 228: 15,\n",
" 229: 16,\n",
" 230: 17,\n",
" 231: 18,\n",
" 232: 19,\n",
" 233: 20,\n",
" 234: 21,\n",
" 235: 22,\n",
" 236: 23,\n",
" 237: 24,\n",
" 238: 25,\n",
" 240: 26,\n",
" 241: 27,\n",
" 242: 28,\n",
" 243: 29,\n",
" 244: 30,\n",
" 245: 31,\n",
" 246: 32,\n",
" 247: 33,\n",
" 248: 34,\n",
" 249: 35,\n",
" 250: 36,\n",
" 256: 37,\n",
" 257: 38,\n",
" 258: 39,\n",
" 259: 40,\n",
" 260: 41,\n",
" 430: 77,\n",
" 431: 78,\n",
" 432: 79,\n",
" 433: 80,\n",
" 461: 81,\n",
" 462: 82,\n",
" 475: 83,\n",
" 476: 84,\n",
" 477: 85,\n",
" 478: 86,\n",
" 479: 87,\n",
" 482: 42,\n",
" 483: 43,\n",
" 484: 44,\n",
" 485: 45,\n",
" 486: 46,\n",
" 487: 47,\n",
" 488: 48,\n",
" 503: 49,\n",
" 504: 50,\n",
" 509: 51,\n",
" 510: 52,\n",
" 511: 53,\n",
" 518: 54,\n",
" 523: 55,\n",
" 524: 56,\n",
" 530: 57,\n",
" 531: 58,\n",
" 532: 59,\n",
" 533: 60,\n",
" 543: 61,\n",
" 544: 62,\n",
" 545: 63,\n",
" 546: 64,\n",
" 563: 65,\n",
" 564: 66,\n",
" 565: 67,\n",
" 574: 68,\n",
" 575: 69,\n",
" 577: 70,\n",
" 580: 71,\n",
" 585: 72,\n",
" 586: 73,\n",
" 587: 74,\n",
" 588: 75,\n",
" 589: 76,\n",
" 683: 88,\n",
" 692: 89,\n",
" 696: 90,\n",
" 697: 91,\n",
" 698: 92,\n",
" 704: 93,\n",
" 705: 94,\n",
" 707: 95,\n",
" 751: 96,\n",
" 752: 97,\n",
" 753: 98,\n",
" 760: 99,\n",
" 764: 100,\n",
" 766: 101,\n",
" 767: 102,\n",
" 769: 103,\n",
" 770: 104,\n",
" 845: 105,\n",
" 846: 106,\n",
" 848: 107,\n",
" 849: 108,\n",
" 851: 109,\n",
" 852: 110,\n",
" 853: 111,\n",
" 854: 112,\n",
" 855: 113,\n",
" 856: 114,\n",
" 885: 115,\n",
" 886: 116,\n",
" 887: 117,\n",
" 888: 118,\n",
" 889: 119,\n",
" 934: 120,\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"f = [\n",
" 'C:/Users/Jakob/Documents/Ham/Izpitna vprašanja/A_01.txt',\n",
" 'C:/Users/Jakob/Documents/Ham/Izpitna vprašanja/A_02.txt',\n",
" 'C:/Users/Jakob/Documents/Ham/Izpitna vprašanja/A_03.txt',\n",
" 'C:/Users/Jakob/Documents/Ham/Izpitna vprašanja/A_04.txt',\n",
"]\n",
"\n",
"q_split = re.compile(r'\\n(?=\\d+\\.)')\n",
"ql_split = re.compile(r'\\n\\s*')\n",
"a_split = re.compile(r'\\n\\s*(?=[A-Z]\\.)')\n",
"invalid_chars = re.compile(r'[^\\w\\s\\d\\n\\.?!:;,蚞ȊŽ/\"\\-\\(\\)\\[\\]»«…=*%+\\\\\\{\\}$∞<>]')\n",
"\n",
"def map_a(s: str):\n",
" a = [re.sub(r'\\s+', ' ', ss.strip()) for ss in a_split.split(s)]\n",
" for i in range(len(a)):\n",
" if ord(a[i][0])-ord('A') != i:\n",
" raise Exception(s)\n",
" return [aa[2:].lstrip().rstrip('. ') for aa in a]\n",
"\n",
"def map_q(s: str):\n",
" t = ql_split.split(s)\n",
" ca = t[0][-1]\n",
" if ca < 'A' or ca > 'Z':\n",
" raise Exception(s)\n",
" t[0] = t[0][:-1].strip()\n",
" return ' '.join(t), ord(ca)-ord('A')\n",
"\n",
"def map_qa(s: str):\n",
" s = s.strip()\n",
" if invalid_chars.search(s):\n",
" raise Exception(s)\n",
" i = s.find('.')\n",
" id = int(s[:i])\n",
" s = s[i+1:].strip()\n",
" i = re.search(r'\\n{2,}', s)\n",
" if i == None:\n",
" q = ' '.join(ql_split.split(s))\n",
" a = ca = None\n",
" else:\n",
" i = i.start()\n",
" q, ca = map_q(s[:i].strip())\n",
" a = map_a(s[i+2:].strip())\n",
" img = imageRefs.get(id)\n",
" if img != None:\n",
" img = f'image-{img:03d}.png'\n",
" return {'id': id, 'question': q, 'image': img, 'answers': a, 'correct': ca}\n",
"\n",
"q = []\n",
"for ff in f:\n",
" with open(ff, encoding='utf-8') as file:\n",
" q.extend(map(map_qa, q_split.split(file.read())))\n",
"\n",
"for i in range(len(q)):\n",
" if q[i]['id'] != i + 1:\n",
" raise Exception(f'{q[i-1][\"id\"]} != {i+1}')\n",
"for qa in q:\n",
" if qa['answers'] != None and len(qa['answers']) != 3:\n",
" raise Exception(qa)\n",
"\n",
"print(len(q), 'questions')\n",
"with open('../assets/questions.json', 'w', encoding='utf-8') as file:\n",
" json.dump(q, file, ensure_ascii=False, indent=2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Add categories\n",
"categories = [\n",
" {'id': 1, 'title': 'Zgodovina radioamaterstva, IARU in ZRS', 'questions': [(1, 12)]},\n",
" {'id': 2, 'title': 'Osnovni pojmi o radijskih komunikacijah', 'questions': [(13, 24)]},\n",
" {'id': 3, 'title': 'Razdelitev frekvenc, moči, regioni, code', 'questions': [(25, 128), (239, 239)]},\n",
" {'id': 4, 'title': 'Mednarodni in slovenski predpisi', 'questions': [(129, 149)]},\n",
" {'id': 5, 'title': 'Q-kod in kratice', 'questions': [(150, 168), (173, 201), (251, 255), (261, 268), (299, 299)]},\n",
" {'id': 6, 'title': 'Operaterstvo', 'questions': [(169, 172), (283, 298), (375, 385), (388, 394)]},\n",
" {'id': 7, 'title': 'Prefiksi in sufiksi', 'questions': [(202, 212)]},\n",
" {'id': 9, 'title': 'Risanje', 'questions': [(240, 250), (256, 260)]},\n",
" {'id': 10, 'title': 'RST sistem', 'questions': [(269, 282)]},\n",
" {'id': 11, 'title': 'Digitalne komunikacije', 'questions': [(300, 312)]},\n",
" {'id': 12, 'title': 'Radioamaterski dnevnik', 'questions': [(313, 325)]},\n",
" {'id': 13, 'title': 'QSL kartice', 'questions': [(326, 337)]},\n",
" {'id': 14, 'title': 'Časocvne cone in UTC', 'questions': [(338, 344)]},\n",
" {'id': 15, 'title': 'Univerzalni lokator', 'questions': [(345, 350)]},\n",
" {'id': 16, 'title': 'Radioamaterska tekmovanja in diplome', 'questions': [(351, 362), (386, 387)]},\n",
" {'id': 17, 'title': 'Ham spirit in ARON', 'questions': [(363, 374)]},\n",
" {'id': 18, 'title': 'Fizika, elektrotehnika', 'questions': [(395, 454), (465, 492)]},\n",
" {'id': 19, 'title': 'Valovanje', 'questions': [(455, 464), (777, 804)]},\n",
" {'id': 20, 'title': 'Električna moč in izkoristek', 'questions': [(493, 504)]},\n",
" {'id': 21, 'title': 'Tuljave in kondenzatorji', 'questions': [(505, 541)]},\n",
" {'id': 22, 'title': 'Električni filtri', 'questions': [(542, 569)]},\n",
" {'id': 23, 'title': 'Polprevodniki in elektronike', 'questions': [(570, 591)]},\n",
" {'id': 24, 'title': 'Mikrofon in zvočnik', 'questions': [(592, 594)]},\n",
" {'id': 25, 'title': 'Električni sklopi', 'questions': [(595, 609)]},\n",
" {'id': 26, 'title': 'Digitalna obdelava signalov', 'questions': [(610, 624)]},\n",
" {'id': 27, 'title': 'Radijski valovi in signali', 'questions': [(625, 668)]},\n",
" {'id': 28, 'title': 'Radijski oddajniki', 'questions': [(669, 707)]},\n",
" {'id': 29, 'title': 'Radijski sprejemniki', 'questions': [(708, 776)]},\n",
" {'id': 30, 'title': 'Razširjanje radijskih valov', 'questions': [(805, 835)]},\n",
" {'id': 31, 'title': 'Antene', 'questions': [(213, 233), (836, 868)]},\n",
" {'id': 32, 'title': 'Napajanje anten', 'questions': [(234, 238), (869, 900)]},\n",
" {'id': 33, 'title': 'Motnje', 'questions': [(901, 904)]},\n",
" {'id': 34, 'title': 'Varnost pri delu', 'questions': [(905, 908)]},\n",
" {'id': 35, 'title': 'Meritve', 'questions': [(909, 970)]},\n",
"]\n",
"\n",
"with open('../public/questions.json', encoding='utf-8') as file:\n",
" q = json.load(file)\n",
"\n",
"if type(q) is not list:\n",
" q = q['questions']\n",
" for i in range(len(q)):\n",
" id = q[i]['id']\n",
" for c in categories:\n",
" for fr, to in c['questions']:\n",
" if id >= fr and id <= to:\n",
" q[i]['category'] = c['id']\n",
" break\n",
" else:\n",
" continue\n",
" break\n",
"\n",
"categories = [{'id': c['id'], 'title': c['title']} for c in categories]\n",
"\n",
"# Remove \"Risanje\"\n",
"q = list(filter(lambda x: x['category'] != 9, q))\n",
"categories = list(filter(lambda x: x['id'] != 9, categories))\n",
"\n",
"q = {'categories': categories, 'questions': q}\n",
"\n",
"with open('../public/questions.json', 'w', encoding='utf-8') as file:\n",
" json.dump(q, file, ensure_ascii=False, indent=2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"with open('../assets/questions.json', encoding='utf-8') as file:\n",
" q = json.load(file)\n",
"\n",
"for qq in q['questions']:\n",
" if 'upor' in qq['question'] and any(map(lambda a: 'W' in a, qq['answers'])):\n",
" print(qq)\n",
"\n",
"print('---')\n",
"\n",
"cmp = set()\n",
"for c in q['categories']:\n",
" for f, t in c['questions']:\n",
" all = list(range(f, t+1))\n",
" if len(cmp.intersection(all)) != 0:\n",
" print(f, t, c['title'])\n",
" print(cmp.intersection(all))\n",
" print()\n",
" cmp.update(all)\n",
"\n",
"print('---')\n",
"\n",
"all = set(range(1, q['questions'][-1]['id']+1))\n",
"print(len(all), len(cmp))\n",
"if cmp != all:\n",
" print(all.difference(cmp))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.1"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "5238573367df39f7286bb46f9ff5f08f63a01a80960060ce41e3c79b190280fa"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}