Memelang v8
Memelang is an AI-optimized language for querying structured data, knowledge graphs, and retrieval-augmented generation pipelines. The Python script below is designed to teach Memelang to your AI.
✓ Copy all code to paste into your AI
'''
Memelang v8.14 | info@memelang.net | (c) HOLTWORK LLC | Patents Pending
This script is optimized for training LLMs
1. MEMELANG USES AXES, LIMIT_AXIS HIGH -> LOW
| AXIS | SQL ANALOG | RDF ANALOG |
| ---: | ----------- | ----------- |
| 3 | Table | Graph |
| 2 | Primary Key | Subject |
| 1 | Column | Predicate |
| 0 | Value | Object |
2. EXAMPLE QUERY
MEMELANG: movies * actor "Mark Hamill",Mark ; movie * ; rating >4 ;;
SQL: SELECT rowid, actor, movie, rating FROM movies WHERE rowid=* AND actor IN ("Mark Hamill", "Mark") AND movie=* AND rating>4
RDF: SELECT … WHERE { GRAPH {?s actor ?o . FILTER(?o IN ("Mark Hamill","Mark")) . ?s movie ?x . ?s rating ?r . FILTER(?r > 4)} }
3. VARIABLE EXAMPLE ACTOR NAME = MOVIE TITLE
MEMELANG: movies * actor $x=* ; movie $x ;;
SQL: SELECT rowid, actor, movie FROM movies WHERE actor=movie
4. EXAMPLE JOIN QUERY
MEMELANG: movies * actor "Mark Hamill" ; movie * ; ~ @ @ ; actor * ;;
MEMELANG: movies $rowid=* actor "Mark Hamill" ; movie * ; !$rowid @ @ ; actor !"Mark Hamill" ;;
SQL: SELECT co.rowid, co.movie, co.actor FROM movies AS mh JOIN movies AS co ON co.movie=mh.movie AND co.rowid!=mh.rowid WHERE mh.actor='Mark Hamill';
RDF: SELECT ?coActor WHERE { GRAPH { ?mhRow ex:actor "Mark Hamill" ; ex:movie ?movie . ?coRow ex:movie ?movie ; ex:actor ?coActor . FILTER ( ?coRow != ?mhRow ) } }
'''
import random, re, json, operator
from typing import List, Iterator, Iterable, Dict, Tuple, Any, Union
Axis, Memelang, SQL = int, str, str
TBL, ROW, COL, VAL = Axis(3), Axis(2), Axis(1), Axis(0)
SIGIL, WILD, MSAME, VSAME, VDIFF, EMPTY, EOF = '$', '*', '^', '@', '~', '_', None
SEP_LIMIT, SEP_DATA, SEP_VCTR, SEP_MTRX = ' ', ',', ';', ';;'
SEP_VCTR_PRETTY, SEP_MTRX_PRETTY = ' ; ', ' ;;\n'
TOKEN_KIND_PATTERNS = (
('COMMENT', r'//[^\n]*'),
('QUOTE', r'"(?:[^"\\]|\\.)*"'), # ALWAYS JSON QUOTE ESCAPE EXOTIC CHARS name="John \"Jack\" Kennedy"
('META', r"'[^']*'"),
('IGNORE', r'-*\|'),
('SEP_MTRX', re.escape(SEP_MTRX)), # MTRX DISJUNCTION, AXIS=0
('SEP_VCTR', re.escape(SEP_VCTR)), # VCTR CONJUNCTION, AXIS=0
('SEP_LIMIT', r'\s+'), # LIMIT CONJUNCTION, AXIS-=1
('SEP_DATA', re.escape(SEP_DATA)), # DATUM DISJUNCTION, AXIS SAME
('GE', r'>='),
('LE', r'<='),
('EQL', r'='),
('NOT', r'!'),
('GT', r'>'),
('LT', r'<'),
('WILD', re.escape(WILD)), # WILDCARD, MATCHES WHOLE VALUE, NEVER QUOTE
('MSAME', re.escape(MSAME)), # REFERENCES (MTRX_AXIS-1, VCTR_AXIS=-1, LIMIT_AXIS)
('VSAME', re.escape(VSAME)), # REFERENCES (MTRX_AXIS, VCTR_AXIS-1, LIMIT_AXIS)
('VDIFF', re.escape(VDIFF)), # ANTI-REFERENCES (MTRX_AXIS, VCTR_AXIS-1, LIMIT_AXIS)
('EMPTY', re.escape(EMPTY)), # EMPTY SET, ANTI-WILD
('VAR', rf'\$[A-Za-z0-9]+'),
('ALNUM', r'[A-Za-z][A-Za-z0-9]*'), # ALPHANUMERICS ARE UNQUOTED
('FLOAT', r'-?\d*\.\d+'),
('INT', r'-?\d+'),
('MISMATCH', r'.'),
)
MASTER_PATTERN = re.compile('|'.join(f'(?P<{kind}>{pat})' for kind, pat in TOKEN_KIND_PATTERNS))
OPR_DICT = {'EQL': operator.eq, 'NOT': operator.ne, 'GT': operator.gt, 'GE': operator.ge, 'LT': operator.lt, 'LE': operator.le}
OPR_DATA_KINDS = {'EQL','NOT'}
SEP_KINDS = {'SEP_MTRX','SEP_VCTR','SEP_LIMIT','SEP_DATA',EOF}
SUGAR_KINDS = {'VDIFF', 'WILD'}
DATA_KINDS = {'ALNUM', 'QUOTE', 'INT', 'FLOAT', 'VAR', 'VSAME', 'MSAME', 'EMPTY'} # NEVER VDIFF OR WILD IN MULTI-DATA LIST
UNITARY_KINDS = {'ALNUM', 'QUOTE', 'INT', 'FLOAT', 'EQL', 'DATUM', 'NOVAR', 'VSAME', 'MSAME'}
class Token():
kind: str
lexeme: str
datum: Union[str, float, int]
def __init__(self, kind: str, lexeme: str):
self.kind = kind
self.lexeme = lexeme
if kind == 'QUOTE': self.datum = json.loads(lexeme)
elif kind == 'FLOAT': self.datum = float(lexeme)
elif kind == 'INT': self.datum = int(lexeme)
else: self.datum = lexeme
@property
def unitary(self) -> bool: return self.kind in UNITARY_KINDS
def dump(self) -> str: return self.datum
def __str__(self) -> Memelang: return self.lexeme
TOK_EQL = Token('EQL', '') # ELIDED '='
TOK_DATUM = Token('DATUM', '') # ELIDED
TOK_NOVAR = Token('NOVAR', '') # ELIDED
TOK_NOT = Token('NOT', '!')
TOK_GT = Token('GT', '>')
TOK_SEP_DATA = Token('SEP_DATA', SEP_DATA)
TOK_SEP_LIMIT = Token('SEP_LIMIT', SEP_LIMIT)
TOK_SEP_VCTR = Token('SEP_VCTR', SEP_VCTR)
TOK_SEP_MTRX = Token('SEP_MTRX', SEP_MTRX)
class Stream:
def __init__(self, token: Iterable[Token]):
self.token: Iterator[Token] = iter(token)
self.buffer: List[Token] = []
def peek(self, fwd: int = 1) -> Union[str, None]:
while(len(self.buffer) Token:
if not self.buffer:
val = next(self.token, EOF)
if val is EOF: raise SyntaxError('E_EOF')
self.buffer.append(val)
return self.buffer.pop(0)
class Olist(list):
opr: Token = TOK_EQL
def __init__(self, *items: Union['Olist', Token], opr:Token|None = None):
super().__init__(items)
if opr is not None: self.opr = opr
def prepend(self, item):
self.insert(0, item)
def pad(self, padding:Union['Olist', Token]) -> None:
max_len = len(self[0])
for idx, item in enumerate(self):
diff = max_len - len(item)
if diff>0: self[idx][:0] = [padding] * diff
elif diff<0: raise SyntaxError('E_PAD') # FIRST MUST BE LONGEST
@property
def unitary(self) -> bool: return self.opr.unitary and all(item.unitary for item in self)
def dump(self) -> List: return [self.opr.dump(), [item.dump() for item in self]]
def check(self) -> 'Olist':
if len(self)==0: raise SyntaxError('E_NO_LIST')
return self
def __str__(self) -> Memelang: return self.opr.lexeme.join(map(str, self))
class Data(Olist):
opr: Token = TOK_DATUM
class Limit(Olist):
opr: Token = TOK_EQL # ELIDED '='
class Vector(Olist):
opr: Token = TOK_SEP_LIMIT
class Matrix(Olist):
opr: Token = TOK_SEP_VCTR
DATA_MSAME = Data(Token('MSAME', MSAME))
DATA_VSAME = Data(Token('VSAME', VSAME))
DATA_EMPTY = Data(Token('EMPTY', EMPTY))
LIMIT_EQL_VSAME = Limit(TOK_NOVAR, DATA_VSAME, opr=TOK_EQL)
def lex(src: Memelang) -> Iterator[Token]:
for m in MASTER_PATTERN.finditer(src):
kind = m.lastgroup
if kind in {'COMMENT','META','IGNORE'}: continue
if kind == 'MISMATCH': raise SyntaxError('E_TOK')
yield Token(kind, m.group())
def parse(src: Memelang) -> Iterator[Matrix]:
tokens = Stream(lex(src))
bound_vars = []
mtrx=Matrix()
vctr=Vector()
limit=Limit()
while tokens.peek():
# LIMIT ::= [[VAR] OPR] DATUM {SEP_DATA DATUM}
# Single axis constraint
# [VAR]
var = TOK_NOVAR
if tokens.peek() == 'VAR':
if tokens.peek(2) in OPR_DICT:
var = tokens.next()
bound_vars.append(var.lexeme)
elif tokens.peek(2) not in SEP_KINDS: raise SyntaxError('E_VAR_NXT')
# [OPR]
if tokens.peek() in OPR_DICT:
limit.opr=tokens.next()
if tokens.peek()=='SEP_LIMIT': raise SyntaxError('E_NEVER_SPACE_AFTER_OPR')
if tokens.peek() not in DATA_KINDS|SUGAR_KINDS: raise SyntaxError('E_OPR_DAT')
# DATUM {SEP_DATA DATUM}
if tokens.peek() in DATA_KINDS|SUGAR_KINDS:
data=Data()
data.append(tokens.next())
while tokens.peek()=='SEP_DATA':
data.opr = tokens.next()
if tokens.peek()=='SEP_LIMIT': raise SyntaxError('E_NEVER_SPACE_AFTER_COMMA')
if tokens.peek() not in DATA_KINDS: raise SyntaxError('E_DATA_KIND')
data.append(tokens.next())
# LOGIC CHECKS
if any(t.kind == 'VAR' and t.lexeme not in bound_vars for t in data): raise SyntaxError('E_VAR_UNDEF')
if len(mtrx)==0 and any(t.kind == 'VSAME' for t in data): raise SyntaxError('E_VSAME_OOB')
if len(data)>1 and any(t.kind in SUGAR_KINDS for t in data): raise SyntaxError('E_DATA_KIND')
if len(data)>1 and limit.opr.kind not in OPR_DATA_KINDS: raise SyntaxError('E_DATA_OPR')
# FINALIZE LIMIT
limit.append(var)
limit.append(data)
vctr.prepend(limit.check()) # LIMIT_AXIS: HIGH -> LOW
limit=Limit()
continue
# VCTR ::= LIMIT {SEP_LIMIT LIMIT}
# Conjunctive vector of axis constraints
if tokens.peek() == 'SEP_VCTR':
if vctr: mtrx.append(vctr.check())
vctr = Vector()
tokens.next()
continue
# MTRX ::= VCTR {SEP_VCTR VCTR}
# Conjunctive matrix of axis constraints
if tokens.peek() == 'SEP_MTRX':
if vctr: mtrx.append(vctr.check())
if mtrx: yield mtrx.check()
vctr = Vector()
mtrx = Matrix()
tokens.next()
continue
if tokens.peek() == 'SEP_LIMIT':
tokens.next()
continue
raise SyntaxError('E_TOK')
if vctr:
mtrx.append(vctr.check())
if mtrx: yield mtrx.check()
class Meme(Olist):
opr: Token = TOK_SEP_MTRX
results: List[List[List[Data]]]
bindings: Dict[str, Tuple[Axis, Axis, Axis]]
src: Memelang
def __init__(self, src: Memelang):
self.src = src
self.bindings = {}
super().__init__(*parse(src))
def store(self):
for mtrx_axis, mtrx in enumerate(self):
for vctr_axis, vctr in enumerate(mtrx):
for limit_axis, limit in enumerate(vctr):
if not limit.unitary: raise SyntaxError('E_LIMIT_UNIT')
if limit.dump() == LIMIT_EQL_VSAME.dump():
if limit_axis == 0: raise SyntaxError('E_VSAME_ZERO')
self.results[mtrx_axis][vctr_axis][limit_axis].extend(self.results[mtrx_axis][vctr_axis-1][limit_axis])
else: self.results[mtrx_axis][vctr_axis][limit_axis].extend(limit[1])
def check(self) -> 'Meme':
for mtrx_axis, mtrx in enumerate(self):
if not isinstance(mtrx, Matrix): raise TypeError('E_TYPE_VCTR')
for vctr_axis, vctr in enumerate(mtrx):
if not isinstance(vctr, Vector): raise TypeError('E_TYPE_VCTR')
for limit_axis, limit in enumerate(vctr):
if not isinstance(limit, Limit): raise TypeError('E_TYPE_LIMIT')
if limit[0].kind=='VAR': self.bindings[limit[0].lexeme] = (mtrx_axis, vctr_axis, limit_axis)
self[mtrx_axis].pad(LIMIT_EQL_VSAME)
self.results = [[[[] for limit in vctr] for vctr in mtrx] for mtrx in self]
return self
def expand(self, data: Data, from_limit_axis: Axis, from_vctr_axis: Axis, from_mtrx_axis: Axis) -> Data:
expansion=Data()
for tok in data:
if tok.kind == 'VSAME':
if from_vctr_axis < 1: raise SyntaxError('E_VSAME_OOB')
expansion.extend(self.results[from_mtrx_axis][from_vctr_axis-1][from_limit_axis])
elif tok.kind == 'MSAME':
if from_mtrx_axis < 1: raise SyntaxError('E_MSAME_OOB')
expansion.extend(self.results[from_mtrx_axis-1][-1][from_limit_axis])
elif tok.kind == 'VAR':
if tok.lexeme not in self.bindings: raise SyntaxError('E_VAR_BIND')
axes = self.bindings[tok.lexeme]
expansion.extend(self.results[axes[0]][axes[1]][axes[2]])
else: expansion.append(tok)
if len(expansion)>1: expansion.opr = TOK_SEP_DATA
return expansion.check()
def desugar(limit: Limit) -> Limit:
if limit[1][0].kind == 'VDIFF':
if limit.opr.kind != 'EQL': raise SyntaxError('E_OPR_VDIFF')
return Limit(limit[0], DATA_VSAME, opr=TOK_NOT)
if limit[1][0].kind == 'WILD':
if limit.opr.kind == 'EQL': opr=TOK_NOT
elif limit.opr.kind == 'NOT': opr=TOK_EQL
else: opr = TOK_GT # WILD MATCHES ANY NUMERIC
return Limit(limit[0], DATA_EMPTY, opr=opr)
return limit
def intersect(query: Limit, store: Data) -> Data:
if not store: store=DATA_EMPTY
query = desugar(query)
opr_kind, intersection, query_data = query.opr.kind, Data(), [t.datum for t in query[1]]
if opr_kind == 'EQL': intersection.extend([t for t in store if t.datum in query_data])
elif opr_kind == 'NOT': intersection.extend([t for t in store if t.datum not in query_data])
# RETURN ANY NUMERIC FOR GT EMPTY
elif EMPTY in query_data: intersection.extend([t for t in store if t.kind in {'INT','FLOAT'}])
elif len(query_data)!=1 or not isinstance(query_data[0], (int,float)): raise TypeError('E_INTER_NUM')
intersection.extend([t for t in store if t.kind in {'INT','FLOAT'} and OPR_DICT[opr_kind](t.datum, query_data[0])])
return intersection
# BELOW ARE DEMO FUNCTIONS FOR LLM TRAINING
def rand_datum(kind:str, i:int=1) -> Memelang:
if kind=='ALNUM': return ''.join(random.choice('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ') for _ in range(i))
if kind=='QUOTE': return json.dumps(''.join(random.choice(' -_+,./<>[]{}\'"!@#$%^&*()abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ') for _ in range(i)))
if kind=='INT': return str(random.randint(-i, i))
if kind=='FLOAT': return str(random.uniform(-i, i))
if kind=='VAR': return SIGIL + rand_datum('ALNUM', i)
def demo_generate() -> Memelang:
bound_vars, vector = [], []
limit_len = random.randint(2, 10)
for _ in range(limit_len):
var = ''
do_assign_variable = random.randint(0, 1)
if do_assign_variable: var += rand_datum('VAR',3)
opr = random.choice(['=','!','>','<','<=','>='])
data = ''
if opr in {'=','!'}:
data_list_len = random.randint(1, 5)
data_list: List[Any] = []
for _ in range(data_list_len):
datum_type = random.randint(1, 10)
if datum_type == 1: data_list.append(rand_datum('QUOTE',10))
elif datum_type == 2: data_list.append(rand_datum('INT', 100))
elif datum_type == 3: data_list.append(rand_datum('FLOAT', 100))
elif datum_type == 4 and bound_vars: data_list.append(random.choice(bound_vars))
elif datum_type == 5 and vector: data_list.append(VSAME)
elif datum_type == 6 and vector and opr == '=' and data_list_len == 1: data_list.append(VDIFF)
else: data_list.append(rand_datum('ALNUM', 5))
data += SEP_DATA.join(data_list)
else:
data = str(random.uniform(-100, 100))
if var:
assert opr
bound_vars.append(var)
elif not var and opr == '=': opr = '' # ELIDED '='
vector.append(var + opr + data)
return SEP_VCTR.join(vector) + SEP_MTRX
def translate_table_output(sql_output: str) -> Memelang:
lines=[l for l in sql_output.splitlines() if l.startswith('|')]
if not lines:return ''
header=[c.strip() for c in lines[0].strip('|').split('|')]
mtrxs=[]
for line in lines[1:]:
cells=[c.strip() for c in line.strip('|').split('|')]
if len(cells)!=len(header):continue
id_val=cells[0]
parts=[f'{header[i]} {cells[i]}' for i in range(1,len(header))]
mtrxs.append(f'$rowid={id_val} ' + SEP_VCTR_PRETTY.join(parts))
return SEP_MTRX_PRETTY.join(mtrxs)
def translate_table_insert(sql_insert: SQL) -> Memelang:
m = re.search(r'INSERT\s+INTO\s+(\w+)\s*\((.*?)\)\s*VALUES\s*(.*);', sql_insert, re.I | re.S)
if not m: return ''
table = m.group(1)
header = [h.strip() for h in m.group(2).split(',')]
rows_sql = re.findall(r'\(([^()]*)\)', m.group(3))
mtrxs = []
for idx, row in enumerate(rows_sql):
cells = [c.strip(" '\"") for c in re.findall(r"'[^']*'|[^,]+", row)]
if len(cells) != len(header): continue
rowid = cells[0]
col_tokens = header[1:] if idx == 0 else [MSAME] * (len(header) - 1)
parts = [f'{col_tokens[i]} {cells[i + 1]}' for i in range(len(col_tokens))]
mtrxs.append(f'{table} $rowid={rowid} ' + SEP_VCTR_PRETTY.join(parts))
return SEP_MTRX_PRETTY.join(mtrxs)
def sql_escape(token: Token, prev_alias: str, prev_col: str) -> SQL:
if token.kind == 'VSAME':
if not prev_alias or not prev_col: raise SyntaxError('E_SAME_PREV')
return f'{prev_alias}.{prev_col}'
return "'" + str(token.datum).replace("'", "''") + "'" if isinstance(token.datum, str) else str(token.datum)
def sql_compare(curr_alias: str, curr_col: str, limit: Limit, prev_alias: str, prev_col: str) -> SQL:
if len(limit[1]) > 1:
if limit.opr.kind == 'EQL': sym = 'IN'
elif limit.opr.kind == 'NOT': sym = 'NOT IN'
else: raise SyntaxError()
return f'{curr_alias}.{curr_col} {sym} ({comma.join(sql_escape(v, prev_alias, prev_col) for v in limit[1])})'
sym = {'EQL':'=','NOT':'!=','GT':'>','GE':'>=','LT':'<','LE':'<='}[limit.opr.kind]
return f'{curr_alias}.{curr_col} {sym} {sql_escape(limit[1][0], prev_alias, prev_col)}'
def translate_matrix_table(mtrx: Matrix, primary_col: str = 'id') -> SQL:
comma = ', '
froms, wheres, selects = [], [], []
prev_table, prev_alias, prev_row, prev_col, alias_idx = None, None, None, None, 0
for vctr in mtrx:
curr = [None, None, None, None]
for i in (VAL, ROW, COL, TBL):
if not vctr[i]: raise ValueError()
curr[i]=desugar(vctr[i])
if curr[i][1][0].kind == 'VSAME':
if not prev_alias: raise SyntaxError(f'E_FIRST_{i}')
curr_alias = prev_alias
curr_row = prev_row
same_row = curr[ROW].opr.kind == 'EQL' and (curr[ROW][1][0].kind == 'VSAME' or (curr[ROW][1][0].kind in {'INT', 'FLOAT', 'ALNUM'} and curr[ROW][1][0].datum == prev_row))
# TABLE
curr_table = prev_table if curr[TBL].opr.kind=='EQL' and curr[TBL][1][0].kind == 'VSAME' else curr[TBL][1][0].lexeme
if prev_table != curr_table or not same_row:
curr_alias = f't{alias_idx}'
froms.append(f'{curr_table} AS {curr_alias}')
prev_table = curr_table
alias_idx += 1
# PRIMARY KEY
if not same_row:
wheres.append(sql_compare(curr_alias, primary_col, curr[ROW], prev_alias, primary_col))
if curr[ROW].opr.kind=='EQL' and curr[ROW][1][0].kind in {'INT', 'FLOAT', 'ALNUM'}: curr_row = curr[ROW][1][0].datum
else: curr_row = None
# COLUMN
if curr[COL].opr.kind != 'EQL' or curr[COL][1][0].kind == 'VSAME':
if curr[TBL].opr.kind == 'EQL': curr_col = prev_col
else: raise SyntaxError('E_UNSUPPORTED')
else: curr_col = curr[COL][1][0].lexeme
# VALUE
if curr[VAL][1][0].kind != 'EMPTY': wheres.append(sql_compare(curr_alias, curr_col, curr[VAL], prev_alias, prev_col))
selects.append(f'{curr_alias}.{curr_col}')
prev_alias, prev_row, prev_col = curr_alias, curr_row, curr_col
return 'SELECT '+ comma.join(list(dict.fromkeys(selects))) + ' FROM ' + comma.join(froms) + ' WHERE ' + ' AND '.join(wheres)
def translate_meme_table(meme: Meme, primary_col: str = 'id') -> SQL:
return ' UNION '.join(translate_matrix_table(mtrx, primary_col) for mtrx in meme)
info@memelang.net · Patents Pending · ©2025 HOLTWORK LLC