[ACCEPTED]-asn.1 parser in C/Python-asn.1
I wrote such parser a few years ago. It 13 generates python classes for pyasn1 library. I 12 used in on ericsson doc to make parser for 11 their CDRs.
I'll try posting the code here 10 now.
import sys
from pyparsing import *
OpenBracket = Regex("[({]").suppress()
CloseBracket = Regex("[)}]").suppress()
def Enclose(val):
return OpenBracket + val + CloseBracket
def SetDefType(typekw):
def f(a, b, c):
c["defType"] = typekw
return f
def NoDashes(a, b, c):
return c[0].replace("-", "_")
def DefineTypeDef(typekw, typename, typedef):
return typename.addParseAction(SetDefType(typekw)).setResultsName("definitionType") - \
Optional(Enclose(typedef).setResultsName("definition"))
SizeConstraintBodyOpt = Word(nums).setResultsName("minSize") - \
Optional(Suppress(Literal("..")) - Word(nums + "n").setResultsName("maxSize"))
SizeConstraint = Group(Keyword("SIZE").suppress() - Enclose(SizeConstraintBodyOpt)).setResultsName("sizeConstraint")
Constraints = Group(delimitedList(SizeConstraint)).setResultsName("constraints")
DefinitionBody = Forward()
TagPrefix = Enclose(Word(nums).setResultsName("tagID")) - Keyword("IMPLICIT").setResultsName("tagFormat")
OptionalSuffix = Optional(Keyword("OPTIONAL").setResultsName("isOptional"))
JunkPrefix = Optional("--F--").suppress()
AName = Word(alphanums + "-").setParseAction(NoDashes).setResultsName("name")
SingleElement = Group(JunkPrefix - AName - Optional(TagPrefix) - DefinitionBody.setResultsName("typedef") - OptionalSuffix)
NamedTypes = Dict(delimitedList(SingleElement)).setResultsName("namedTypes")
SetBody = DefineTypeDef("Set", Keyword("SET"), NamedTypes)
SequenceBody = DefineTypeDef("Sequence", Keyword("SEQUENCE"), NamedTypes)
ChoiceBody = DefineTypeDef("Choice", Keyword("CHOICE"), NamedTypes)
SetOfBody = (Keyword("SET") + Optional(SizeConstraint) + Keyword("OF")).setParseAction(SetDefType("SetOf")) + Group(DefinitionBody).setResultsName("typedef")
SequenceOfBody = (Keyword("SEQUENCE") + Optional(SizeConstraint) + Keyword("OF")).setParseAction(SetDefType("SequenceOf")) + Group(DefinitionBody).setResultsName("typedef")
CustomBody = DefineTypeDef("constructed", Word(alphanums + "-").setParseAction(NoDashes), Constraints)
NullBody = DefineTypeDef("Null", Keyword("NULL"), Constraints)
OctetStringBody = DefineTypeDef("OctetString", Regex("OCTET STRING"), Constraints)
IA5StringBody = DefineTypeDef("IA5String", Keyword("IA5STRING"), Constraints)
EnumElement = Group(Word(printables).setResultsName("name") - Enclose(Word(nums).setResultsName("value")))
NamedValues = Dict(delimitedList(EnumElement)).setResultsName("namedValues")
EnumBody = DefineTypeDef("Enum", Keyword("ENUMERATED"), NamedValues)
BitStringBody = DefineTypeDef("BitString", Keyword("BIT") + Keyword("STRING"), NamedValues)
DefinitionBody << (OctetStringBody | SetOfBody | SetBody | ChoiceBody | SequenceOfBody | SequenceBody | EnumBody | BitStringBody | IA5StringBody | NullBody | CustomBody)
Definition = AName - Literal("::=").suppress() - Optional(TagPrefix) - DefinitionBody
Definitions = Dict(ZeroOrMore(Group(Definition)))
pf = Definitions.parseFile(sys.argv[1])
TypeDeps = {}
TypeDefs = {}
def SizeConstraintHelper(size):
s2 = s1 = size.get("minSize")
s2 = size.get("maxSize", s2)
try:
return("constraint.ValueSizeConstraint(%s, %s)" % (int(s1), int(s2)))
except ValueError:
pass
ConstraintMap = {
'sizeConstraint' : SizeConstraintHelper,
}
def ConstraintHelper(c):
result = []
for key, value in c.items():
r = ConstraintMap[key](value)
if r:
result.append(r)
return result
def GenerateConstraints(c, ancestor, element, level=1):
result = ConstraintHelper(c)
if result:
return [ "subtypeSpec = %s" % " + ".join(["%s.subtypeSpec" % ancestor] + result) ]
return []
def GenerateNamedValues(definitions, ancestor, element, level=1):
result = [ "namedValues = namedval.NamedValues(" ]
for kw in definitions:
result.append(" ('%s', %s)," % (kw["name"], kw["value"]))
result.append(")")
return result
OptMap = {
False: "",
True: "Optional",
}
def GenerateNamedTypesList(definitions, element, level=1):
result = []
for val in definitions:
name = val["name"]
typename = None
isOptional = bool(val.get("isOptional"))
subtype = []
constraints = val.get("constraints")
if constraints:
cg = ConstraintHelper(constraints)
subtype.append("subtypeSpec=%s" % " + ".join(cg))
tagId = val.get("tagID")
if tagId:
subtype.append("implicitTag=tag.Tag(tag.tagClassContext, tag.tagFormatConstructed, %s)" % tagId)
if subtype:
subtype = ".subtype(%s)" % ", ".join(subtype)
else:
subtype = ""
cbody = []
if val["defType"] == "constructed":
typename = val["typedef"]
element["_d"].append(typename)
elif val["defType"] == "Null":
typename = "univ.Null"
elif val["defType"] == "SequenceOf":
typename = "univ.SequenceOf"
print val.items()
cbody = [ " componentType=%s()" % val["typedef"]["definitionType"] ]
elif val["defType"] == "Choice":
typename = "univ.Choice"
indef = val.get("definition")
if indef:
cbody = [ " %s" % x for x in GenerateClassDefinition(indef, name, typename, element) ]
construct = [ "namedtype.%sNamedType('%s', %s(" % (OptMap[isOptional], name, typename), ")%s)," % subtype ]
if not cbody:
result.append("%s%s%s" % (" " * level, construct[0], construct[1]))
else:
result.append(" %s" % construct[0])
result.extend(cbody)
result.append(" %s" % construct[1])
return result
def GenerateNamedTypes(definitions, ancestor, element, level=1):
result = [ "componentType = namedtype.NamedTypes(" ]
result.extend(GenerateNamedTypesList(definitions, element))
result.append(")")
return result
defmap = {
'constraints' : GenerateConstraints,
'namedValues' : GenerateNamedValues,
'namedTypes' : GenerateNamedTypes,
}
def GenerateClassDefinition(definition, name, ancestor, element, level=1):
result = []
for defkey, defval in definition.items():
if defval:
fn = defmap.get(defkey)
if fn:
result.extend(fn(defval, ancestor, element, level))
return [" %s" % x for x in result]
def GenerateClass(element, ancestor):
name = element["name"]
top = "class %s(%s):" % (name, ancestor)
definition = element.get("definition")
body = []
if definition:
body = GenerateClassDefinition(definition, name, ancestor, element)
else:
typedef = element.get("typedef")
if typedef:
element["_d"].append(typedef["definitionType"])
body.append(" componentType = %s()" % typedef["definitionType"])
szc = element.get('sizeConstraint')
if szc:
body.extend(GenerateConstraints({ 'sizeConstraint' : szc }, ancestor, element))
if not body:
body.append(" pass")
TypeDeps[name] = list(frozenset(element["_d"]))
return "\n".join([top] + body)
StaticMap = {
"Null" : "univ.Null",
"Enum" : "univ.Enumerated",
"OctetString" : "univ.OctetString",
"IA5String" : "char.IA5String",
"Set" : "univ.Set",
"Sequence" : "univ.Sequence",
"Choice" : "univ.Choice",
"SetOf" : "univ.SetOf",
"BitString" : "univ.BitString",
"SequenceOf" : "univ.SequenceOf",
}
def StaticConstructor(x):
x["_d"] = []
if x["defType"] == "constructed":
dt = x["definitionType"]
x["_d"].append(dt)
else:
dt = StaticMap[x["defType"]]
return GenerateClass(x, dt)
for element in pf:
TypeDefs[element["name"]] = StaticConstructor(element)
while TypeDefs:
ready = [ k for k, v in TypeDeps.items() if len(v) == 0 ]
if not ready:
x = list()
for a in TypeDeps.values():
x.extend(a)
x = frozenset(x) - frozenset(TypeDeps.keys())
print TypeDefs
raise ValueError, sorted(x)
for t in ready:
for v in TypeDeps.values():
try:
v.remove(t)
except ValueError:
pass
del TypeDeps[t]
print TypeDefs[t]
print
print
del TypeDefs[t]
This will take a file with syntax, similar 9 to this one:
CarrierInfo ::= OCTET STRING (SIZE(2..3))
ChargeAreaCode ::= OCTET STRING (SIZE(3))
ChargeInformation ::= OCTET STRING (SIZE(2..33))
ChargedParty ::= ENUMERATED
(chargingOfCallingSubscriber (0),
chargingOfCalledSubscriber (1),
noCharging (2))
ChargingOrigin ::= OCTET STRING (SIZE(1))
Counter ::= OCTET STRING (SIZE(1..4))
Date ::= OCTET STRING (SIZE(3..4))
You will need to add this line 8 on top of the generated file:
from pyasn1.type import univ, namedtype, namedval, constraint, tag, char
And name the 7 result defs.py. Then, I attached a bunch 6 of prettyprinters to the defs (if you don't 5 have just skip it)
import defs, parsers
def rplPrettyOut(self, value):
return repr(self.decval(value))
for name in dir(parsers):
if (not name.startswith("_")) and hasattr(defs, name):
target = getattr(defs, name)
target.prettyOut = rplPrettyOut
target.decval = getattr(parsers, name)
Then, it's down to:
def ParseBlock(self, block):
while block and block[0] != '\x00':
result, block = pyasn1.codec.ber.decoder.decode(block, asn1Spec=parserimp.defs.CallDataRecord())
yield result
If 4 you're still interested I'll put the code 3 somewhere. In fact, I'll put it somewhere 2 in any case - but if you're interested just 1 let me know and I'll point you there.
I'm the author of LEPL, a parser written 29 in Python, and what you want to do is one 28 of the things on my "TODO" list.
I 27 will not be doing this soon, but you might 26 consider using LEPL to construct your solution 25 because:
1 - it's a pure Python solution 24 (which makes life simpler)
2 - it can already 23 parse binary data as well as text, so you 22 would only need to use a single tool - the 21 same parser that you would use to parse 20 the ASN1 spec would then be used to parse 19 the binary data
The main downsides are that:
1 18 - it's a fairly new package, so it may be 17 buggier than some, and the support community 16 is not that large
2 - it is restricted to 15 Python 2.6 and up (and the binary parser 14 only works with Python 3 and up).
For more 13 information, please see http://www.acooke.org/lepl - in particular,for 12 binary parsing see the relevant section 11 of the manual (I cannot link directly to 10 that as Stack Overflow seems to think I 9 am spamming)
Andrew
PS The main reason this 8 is not something I have already started 7 is that the ASN 1 specs are not freely available, as 6 far as I know. If you have access to them, and 5 it is not illegal(!), a copy would be greatly 4 appreciated (unfortunately I am currently 3 working on another project, so this would 2 still take time to implement, but it would 1 help me get this working sooner...).
There is an ANTLR ASN.1 grammar; using ANTLR, you should be 3 able to make an ASN.1 parser out of it. Generating 2 code for pyasn1 is left as an exercise to 1 the poster :-)
I have experience with pyasn1 and it's enough 3 to parse quite complex grammars. A grammar 2 is expressed with python structure, so no 1 need to run code generator.
I have done a similar job using asn1c and 23 building around it a Pyrex extension. The 22 wrapped structure is described in 3GPP TS 32.401.
With 21 Pyrex you can write a wrapper thick enough 20 to convert between native Python data types 19 and the correct ASN.1 representations (wrapper 18 generators, such SWIG, tend to not perform 17 complex operations on the type). The wrapper 16 I wrote also tracked the ownership of the 15 underlying C data structures (e.g. accessing 14 to a sub-structure, a Python object was 13 returned, but there was no copy of the underlying 12 data, only reference sharing).
The wrapper 11 was eventually written in a kind of semi-automatic 10 way, but because that has been my only job 9 with ASN.1 I never did the step of completely 8 automatize the code generation.
You can try 7 to use other Python-C wrappers and perform 6 a completely automatic conversion: the job 5 would be less, but then you would move complexity 4 (and repetitive error-prone operations) to 3 the structure users: for this reason I preferred 2 the Pyrex way. asn1c was definitely a good 1 choice.
I recently created the Python package called 3 asn1tools which compiles an ASN.1 specification into 2 Python objects, which can be used to encode 1 and decode messages.
>>> import asn1tools
>>> foo = asn1tools.compile_file('tests/files/foo.asn')
>>> encoded = foo.encode('Question', {'id': 1, 'question': 'Is 1+1=3?'})
>>> encoded
bytearray(b'0\x0e\x02\x01\x01\x16\x09Is 1+1=3?')
>>> foo.decode('Question', encoded)
{'id': 1, 'question': 'Is 1+1=3?'}
More Related questions
We use cookies to improve the performance of the site. By staying on our site, you agree to the terms of use of cookies.