Compact prefix trie

This commit is contained in:
Jakob Kordež 2024-06-27 21:46:34 +02:00
parent e43aac8fd0
commit ebc183b1a2
7 changed files with 23213 additions and 34870 deletions

View File

@ -23,67 +23,18 @@ const doc: IClublogFile = await parseStringPromise(file);
const now = new Date();
const prefixes: { call: string; entity: number }[] = [];
const prefixes: [string, number][] = [];
for (const prefix of doc.clublog.prefixes[0].prefix) {
const end = prefix.end?.[0];
if (end && new Date(end) < now) continue;
prefixes.push({
call: prefix.call[0],
entity: parseInt(prefix.adif[0])
});
prefixes.push([prefix.call[0], parseInt(prefix.adif[0])]);
}
console.log('Parsed', prefixes.length, 'prefixes');
// Build the initial trie
import { TrieNode } from '../src/lib/models/trie';
import { fullBuildTrie } from './parser-helper';
const root = new TrieNode();
for (const { call, entity } of prefixes) {
root.insert(call, entity);
}
// Merge as many nodes as possible
const nodes = new Map([...root.getAllNodes()].map((node) => [node.id, node]));
console.log('Starting merge with', nodes.size, 'nodes');
// Bad merge algorithm, but it works
let anyChanged = true;
while (anyChanged) {
anyChanged = false;
for (const a of nodes.values()) {
if (!nodes.has(a.id)) continue;
for (const b of nodes.values()) {
if (a === b) continue;
if (!nodes.has(b.id)) continue;
if (a.canMerge(b)) {
for (const node of nodes.values()) {
for (const [k, v] of node.children) {
if (v === b) {
node.children.set(k, a);
}
}
}
nodes.delete(b.id);
anyChanged = true;
}
}
}
}
console.log('Finished merge with', nodes.size, 'nodes');
// Validate the trie
for (const { call, entity } of prefixes) {
if (root.findRaw(call)?.entity !== entity) {
console.error('Failed to find', call, entity);
}
}
// Minimize node IDs
let i = 0;
for (const node of root.getAllNodes()) {
node.id = i++;
}
const root = fullBuildTrie(prefixes);
// Output the trie
const out = root.encodeToString();
@ -103,6 +54,7 @@ for (const entity of doc.clublog.entities[0].entity) {
if (end && new Date(end) < now) continue;
entities.push({
id,
dxcc: id,
name,
cqz: cqz ? parseInt(cqz) : undefined,
cont: cont ? cont : undefined

View File

@ -4,6 +4,7 @@ import { TrieNode } from '../src/lib/models/trie';
export function fullBuildTrie(prefixes: [string, number][]): TrieNode {
const root = buildTrie(prefixes);
collapseNodes(root);
buildShortcuts(root);
mergeNodes(root);
minimizeIds(root);
validateTrie(root, prefixes);
@ -29,12 +30,20 @@ export function collapseNodes(root: TrieNode): void {
console.log('Collapsed trie with', root.getAllNodes().size, 'nodes');
}
export function buildShortcuts(root: TrieNode): void {
for (const node of root.getAllNodes()) {
node.buildShortcuts();
}
console.log('Built shortcuts with', root.getAllNodes().size, 'nodes');
}
export function mergeNodes(root: TrieNode): void {
const nodes = new Map([...root.getAllNodes()].map((node) => [node.id, node]));
const parents: Map<number, TrieNode[]> = new Map();
for (const node of nodes.values()) {
for (const child of node.children.values()) {
for (const child of [...node.children.values(), ...node.shortcuts.values()]) {
const list = parents.get(child.id) ?? [];
list.push(node);
parents.set(child.id, list);
@ -54,16 +63,17 @@ export function mergeNodes(root: TrieNode): void {
continue;
}
if (!existing.canMerge(node)) {
throw new Error('Merge conflict false positive');
}
for (const parent of parents.get(node.id) ?? []) {
for (const [k, v] of parent.children) {
if (v === node) {
parent.children.set(k, existing);
}
}
for (const [k, v] of parent.shortcuts) {
if (v === node) {
parent.shortcuts.set(k, existing);
}
}
}
parents.delete(node.id);
nodes.delete(node.id);
@ -78,36 +88,21 @@ export function validateTrie(root: TrieNode, prefixes: [string, number][]): void
for (const [callRaw, entity] of prefixes) {
const [, call, overridesRaw] = callRaw.match(/^=?((?:[A-Z\d/])+)(.*)/)!;
const isExact = callRaw.startsWith('=');
let node: TrieNode | null = root;
let currentEntity: number | null = null;
let currentOverrides: DxccOverrides = new DxccOverrides();
let endOfCall = true;
for (const c of call) {
node = node.children.get(c) ?? null;
if (!node) {
endOfCall = false;
break;
}
currentEntity = node.entity ?? currentEntity;
currentOverrides = currentOverrides.merge(node.overrides);
}
if (endOfCall && isExact) {
node = node?.children.get('') ?? null;
currentEntity = node?.entity ?? currentEntity;
currentOverrides = currentOverrides.merge(node?.overrides ?? null);
}
const node = root.findDxcc(call + (isExact ? '' : ' '));
if (currentEntity !== entity) {
console.error('Failed to find', call, entity);
console.log('Found', currentEntity);
if (node?.entityId !== entity) {
console.log('Failed to find', call, entity);
console.log('Found', node?.entityId);
}
const overrides = DxccOverrides.fromString(overridesRaw);
if (!overrides.isSubsetOf(currentOverrides)) {
console.error('Overrides do not match', call, overridesRaw);
console.log('Found', currentOverrides?.toString());
if (!overrides.isSubsetOf(node?.dxccOverrides)) {
console.log('Overrides do not match', call, overridesRaw);
console.log('Found', node?.dxccOverrides.toString());
}
}
console.log('Finished validation');
}
export function minimizeIds(root: TrieNode): void {
@ -115,6 +110,8 @@ export function minimizeIds(root: TrieNode): void {
for (const node of root.getAllNodes()) {
node.id = i++;
}
console.log('Minimized ids with', root.getAllNodes().size, 'nodes');
}
interface IEntity {

File diff suppressed because it is too large Load Diff

View File

@ -2,7 +2,6 @@ import dxccTreeFile from '../assets/dxcc-tree.txt?raw';
import dxccEntitiesFile from '../assets/dxcc-entities.json';
import { TrieNode } from './models/trie';
import type { DxccEntity } from './models/dxcc-entity';
import { DxccOverrides } from './models/dxcc-overrides';
export const dxccTree = TrieNode.decodeFromString(dxccTreeFile);
@ -13,8 +12,8 @@ export interface DxccResult {
}
export function findDxcc(prefix: string, startingNode: TrieNode = dxccTree): DxccResult | null {
const rawResult = findRawDxcc(prefix, startingNode);
if (!rawResult) return null;
const rawResult = startingNode.findDxcc(prefix.toUpperCase());
if (!rawResult || rawResult.entityId === null) return null;
const entity = dxccEntities.get(rawResult.entityId);
if (!entity) return null;
@ -36,59 +35,6 @@ export function findDxcc(prefix: string, startingNode: TrieNode = dxccTree): Dxc
};
}
export interface RawDxccResult {
entityId: number;
dxccOverrides: DxccOverrides;
matchLength: number;
isExact: boolean;
}
export function findRawDxcc(
prefix: string,
startingNode: TrieNode = dxccTree
): RawDxccResult | null {
prefix = prefix.toUpperCase();
let node = startingNode;
let entityId: number | null = null;
let dxccOverrides = new DxccOverrides();
let tempPrefixLength = 0;
let matchLength = 0;
while (prefix) {
const next = node.children.get(prefix[0]);
if (!next) {
break;
}
node = next;
prefix = prefix.slice(1);
tempPrefixLength++;
if (node.entity) {
entityId = node.entity;
matchLength = tempPrefixLength;
}
if (node.overrides.toString()) {
dxccOverrides = dxccOverrides.merge(node.overrides);
// TODO Debate whether to set matchLength here
matchLength = tempPrefixLength;
}
}
if (!prefix && node?.children.has('')) {
const exact = node.children.get('')!;
return {
entityId: exact.entity ?? entityId!,
dxccOverrides: dxccOverrides.merge(exact.overrides),
// matchLength: exact.entity ? tempPrefixLength : matchLength,
matchLength: tempPrefixLength,
isExact: true
};
}
if (!entityId) return null;
return { entityId, dxccOverrides, matchLength, isExact: false };
}
export const dxccEntities: Map<number, DxccEntity> = new Map(
[...dxccEntitiesFile].map((e) => [e.id, e])
);

View File

@ -81,7 +81,10 @@ export class DxccOverrides {
);
}
merge(other: DxccOverrides | null): DxccOverrides {
/**
* Other takes precedence over this.
*/
merge(other?: DxccOverrides | null): DxccOverrides {
if (!other) return this;
return new DxccOverrides({
cqz: other.cqz ?? this.cqz,

View File

@ -7,13 +7,10 @@ describe('parseString', () => {
31
-YAP-4
-X-3
3(32)
=401
3=401(32)
--700
4
=400
700
=500
4=400
700=500
`;
const root = TrieNode.decodeFromString(encoded);

View File

@ -7,6 +7,7 @@ export class TrieNode {
public entity: number | null;
public overrides: DxccOverrides;
public children: Map<string, TrieNode>;
public shortcuts: Map<string, TrieNode>;
constructor({
id,
@ -24,6 +25,7 @@ export class TrieNode {
this.entity = entity ?? null;
this.children = children ?? new Map();
this.overrides = overrides ?? new DxccOverrides();
this.shortcuts = new Map();
}
/**
@ -52,7 +54,7 @@ export class TrieNode {
`Overrides conflict: ${JSON.stringify(this.overrides)} vs ${JSON.stringify(overrides)}`
);
}
this.overrides = this.overrides.merge(overrides ?? null);
this.overrides = this.overrides.merge(overrides);
return;
}
@ -73,12 +75,70 @@ export class TrieNode {
return next ? next.findRaw(prefix.slice(1)) : null;
}
/**
* Step through the trie to find the next node that matches the prefix.
*/
step(prefix: string): { node: TrieNode; length: number } | null {
// Check for exact match
if (!prefix) {
const exact = this.children.get('');
return exact ? { node: exact, length: 0 } : null;
}
// Check for children
const next = this.children.get(prefix[0]);
if (next) return { node: next, length: 1 };
// Check for shortcuts
for (const [k, v] of this.shortcuts) {
if (prefix.startsWith(k)) return { node: v, length: k.length };
}
return null;
}
/**
* Find the DXCC entity that matches the prefix.
*/
findDxcc(prefix: string): RawDxccResult {
if (!prefix) {
const exact = this.children.get('');
// console.log(prefix, exact);
return {
entityId: exact?.entity ?? this.entity,
dxccOverrides: this.overrides.merge(exact?.overrides),
matchLength: 0,
isExact: !!exact
};
}
const next = this.step(prefix);
if (!next) {
// console.log(prefix, this);
return {
entityId: this.entity,
dxccOverrides: this.overrides,
matchLength: 0,
isExact: false
};
}
const ret = next.node.findDxcc(prefix.slice(next.length));
// console.log(prefix, ret);
const anyC = ret.dxccOverrides.toString() || ret.entityId;
return {
dxccOverrides: this.overrides.merge(ret?.dxccOverrides),
entityId: ret?.entityId ?? this.entity,
isExact: ret?.isExact ?? false,
matchLength: ret.matchLength + (anyC ? next.length : 0)
};
}
/**
* Returns all the nodes in the trie.
*/
getAllNodes(): Set<TrieNode> {
const nodes: TrieNode[] = [this];
for (const child of this.children.values()) {
for (const child of [...this.children.values(), ...this.shortcuts.values()]) {
nodes.push(...child.getAllNodes());
}
return new Set(nodes);
@ -117,11 +177,44 @@ export class TrieNode {
return this.children.size == 0 && !this.entity && !this.overrides.toString();
}
buildShortcuts(): void {
for (const child of new Set(this.children.values())) {
let k: string | null = null;
for (const [key, value] of this.children.entries()) {
if (value === child) {
if (k) {
k = null;
break;
}
k = key;
}
}
if (!k) continue;
if (child.children.size + child.shortcuts.size !== 1) continue;
let curr = child;
let stack = k;
while (
curr.children.size + curr.shortcuts.size === 1 &&
!curr.entity &&
!curr.overrides.toString()
) {
const [key, value] = [...curr.children.entries(), ...curr.shortcuts.entries()][0];
if (key === '') break;
curr = value;
stack += key;
}
this.shortcuts.set(stack, curr);
this.children.delete(k);
}
}
/**
* Generate a hash for merging nodes.
*/
hash(): string {
const children = [...this.children.entries()]
const children = [...this.children.entries(), ...this.shortcuts.entries()]
.map(([k, v]) => `${k}:${v.id}`)
.sort()
.join(',');
@ -129,23 +222,6 @@ export class TrieNode {
return `${this.entity ?? ''}_${children}_${overrides}`;
}
/**
* Checks if this node can be merged with another node.
*/
canMerge(other: TrieNode): boolean {
if (this === other) return false;
if (this.entity !== other.entity) return false;
if (!this.overrides.isEqual(other.overrides)) return false;
// Union set of all children keys
const l = new Set([...this.children.keys(), ...other.children.keys()]);
for (const key of l) {
const a = this.children.get(key);
const b = other.children.get(key);
if (a !== b) return false;
}
return true;
}
/**
* Returns an encoded string of the whole trie.
*/
@ -157,7 +233,7 @@ export class TrieNode {
const overrides = this.overrides?.toString() ?? '';
const s = [`${this.id}${overrides}`];
if (this.entity) {
s.push(`=${this.entity}`);
s[0] += `=${this.entity}`;
}
for (const c of new Set(this.children.values())) {
const chars = [];
@ -167,6 +243,9 @@ export class TrieNode {
chars.sort();
s.push(`-${chars.join('')}-${c.id}`);
}
for (const [k, v] of this.shortcuts.entries()) {
s.push(`>${k}-${v.id}`);
}
return s.join('\n');
}
@ -191,19 +270,23 @@ export class TrieNode {
for (let line of s.trim().split('\n')) {
line = line.trim();
if (!line) continue;
if (line.startsWith('=')) {
const entity = line.slice(1);
currentNode!.entity = parseInt(entity);
} else if (line.startsWith('-')) {
if (line.startsWith('-')) {
const [, chars, child] = line.split('-');
const childNode = getNode(parseInt(child));
if (chars === '') currentNode!.children.set('', childNode);
for (const char of chars) {
currentNode!.children.set(char, childNode);
}
} else if (line.startsWith('>')) {
const [shortcut, child] = line.slice(1).split('-');
const childNode = getNode(parseInt(child));
currentNode!.shortcuts.set(shortcut, childNode);
} else {
currentNode = getNode(parseInt(line));
const overrides = /\d+(.*)/.exec(line)?.[1];
const match = /\d+(=\d+)?(.*)/.exec(line);
const entity = match?.[1];
if (entity) currentNode.entity = parseInt(entity.slice(1));
const overrides = match?.[2];
if (overrides) currentNode.overrides = DxccOverrides.fromString(overrides);
}
}
@ -211,3 +294,10 @@ export class TrieNode {
return root!;
}
}
export interface RawDxccResult {
entityId: number | null;
dxccOverrides: DxccOverrides;
matchLength: number;
isExact: boolean;
}