A DSL for data science with a syntax similar to a natural language.
pip install nldsl==0.15.4
NLDSL is a tool to create domain specific languages (DSLs) for data science, which can be translated into executable code. A new DSL is created by deriving from the CodeGenerator class and rules are added to it via simple python functions. Besides providing code generation NLDSL allows the user to define DSL-level function, which are then treated as first-class rules. Currently we provide extensions for Pandas and PySpark.
The complete documentation as well as a tutorial can be found on the project web page.
Open a terminal and run:
pip install nldsl
from nldsl import PandasCodeGenerator, grammar
code_gen = PandasCodeGenerator()
dsl_stmt = "## x = on df | rename columns old1 to new1, old2 to new2"
python_code = code_gen(dsl_stmt)
new_rule = "#$ my rule $x = rename columns old to $x | drop duplicates"
dsl_stmt = "## x = on df | my rule new"
python_code = code_gen(new_rule + dsl_stmt)
def rename_columns(code, args):
return code + ".rename(columns={})".format({args[i]:args[i+2] for i in range(0, len(args), 3)})
@grammar
def rename_columns(code, args):
""" Desc...
Grammar:
rename columns $my_var[$old to $new]
"""
# The 'Grammar' section in the docstring of this function is mandatory.
return code + ".rename(columns={})".format(dict(args["my_var"]))
code_gen["rename columns"] = rename_columns
PandasCodeGenerator.register_function("rename colummns", rename_columns)
from nldsl import CodeGenerator
class MyCodeGenerator(CodeGenerator):
# optionally you can specify an __init__, extract_environment and postprocessing method.
pass
from nldsl.core import ExpressionRule
class MyExpressionRule(ExpressionRule):
def __init__(self, expr_name, next_keyword):
super().__init__(expr_name, next_keyword)
# The symbol of an expression is modified by specifing it in the operator_map.
# Note that spaces need to be added explicitly.
self.operator_map["and"] = " & "
self.operator_map["not"] = "~"
# You can also change the type of an operator,
# e.g. from binary to being a member function of the left argument.
self.operator_map["in"] = ".isin"
self.operator_type["in"] = OperatorType.UNARY_FUNCTION
# Tell the grammar rule 'my_rule' to use MyExpressionRule instead of the default.
@grammar(expr=MyExpressionRule)
def my_rule(code, args):
# ...
class CodeGenA(CodeGenerator): pass
CodeGenA.register_function("my_rule_a", my_rule_a)
class CodeGenB(CodeGenerator): pass
CodeGenB.register_function("my_rule_b", my_rule_b)
# Combine the rules in CodeGenA und CodeGenB
class CodeGenC(A, B): pass
# Specialize a rule from CodeGenA
class CodeGenD(A): pass
CodeGenD.register_function("my_rule_a", new_rule_a)
class MyCodeGen(CodeGenerator): pass
MyCodeGen.init_meta_model(eval_prefix="//#", define_prefix="//$")
Tested and supported for Python 3.4+