/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System.Collections.Generic;
using System.Text;
using System;
namespace Avro
{
///
/// Collection of static methods for generating the cannonical form of schemas.
///
public static class SchemaNormalization
{
public static long Empty64 = -4513414715797952619;
///
/// Parses a schema into the canonical form as defined by Avro spec.
///
/// Schema
/// Parsing Canonical Form of a schema as defined by Avro spec.
public static string ToParsingForm(Schema s)
{
IDictionary env = new Dictionary();
return Build(env, s, new StringBuilder()).ToString();
}
///
/// Returns a fingerprint of a string of bytes. This string is
/// presumed to contain a canonical form of a schema. The
/// algorithm used to compute the fingerprint is selected by the
/// argument fpName.
///
/// If fpName equals the string
/// "CRC-64-AVRO"
, then the result of is
/// returned in little-endian format.
///
/// If fpName equals the string
/// "MD5"
, then the standard MD5 algorithm is used.
///
/// If fpName equals the string
/// "SHA-256"
, then the standard SHA-256 algorithm is used.
///
/// Otherwise, fpName is
/// not recognized and an
/// ArgumentException
is thrown
///
/// Recommended Avro practice dictiates that
/// "CRC-64-AVRO"
is used for 64-bit fingerprints,
/// "MD5"
is used for 128-bit fingerprints, and
/// "SHA-256"
is used for 256-bit fingerprints.
///
///
/// Name of the hashing algorithm.
/// Data to be hashed.
/// Fingerprint
public static byte[] Fingerprint(string fpName, byte[] data)
{
switch (fpName)
{
case "CRC-64-AVRO":
long fp = Fingerprint64(data);
byte[] result = new byte[8];
for (int i = 0; i < 8; i++)
{
result[i] = (byte) fp;
fp >>= 8;
}
return result;
case "MD5":
var md5 = System.Security.Cryptography.MD5.Create();
return md5.ComputeHash(data);
case "SHA-256":
var sha256 = System.Security.Cryptography.SHA256.Create();
return sha256.ComputeHash(data);
default:
throw new ArgumentException(string.Format("Unsupported fingerprint computation algorithm ({0})", fpName));
}
}
///
/// Returns applied to the parsing canonical form of the supplied schema.
///
/// Name of the hashing algorithm.
/// Schema to be hashed.
/// Fingerprint
public static byte[] ParsingFingerprint(string fpName, Schema s)
{
return Fingerprint(fpName, Encoding.UTF8.GetBytes(ToParsingForm(s)));
}
///
/// Returns applied to the parsing canonical form of the supplied schema.
///
/// Schema to be hashed.
/// Fingerprint
public static long ParsingFingerprint64(Schema s)
{
return Fingerprint64(Encoding.UTF8.GetBytes(ToParsingForm(s)));
}
///
/// Computes the 64-bit Rabin Fingerprint (as recommended in the Avro spec) of a byte string.
///
/// Data to be hashed.
/// Fingerprint
private static long Fingerprint64(byte[] data)
{
long result = Empty64;
foreach (var b in data)
{
result = ((long)(((ulong)result) >> 8)) ^ Fp64.FpTable[(int) (result ^ b) & 0xff];
}
return result;
}
private static StringBuilder Build(IDictionary env, Schema s, StringBuilder o)
{
bool firstTime = true;
Schema.Type st = s.Tag;
switch (st)
{
case Schema.Type.Union:
UnionSchema us = s as UnionSchema;
o.Append('[');
foreach(Schema b in us.Schemas)
{
if (!firstTime)
{
o.Append(",");
}
else
{
firstTime = false;
}
Build(env, b, o);
}
return o.Append(']');
case Schema.Type.Array:
case Schema.Type.Map:
o.Append("{\"type\":\"").Append(Schema.GetTypeString(s.Tag)).Append("\"");
if (st == Schema.Type.Array)
{
ArraySchema arraySchema = s as ArraySchema;
Build(env, arraySchema.ItemSchema, o.Append(",\"items\":"));
}
else
{
MapSchema mapSchema = s as MapSchema;
Build(env, mapSchema.ValueSchema, o.Append(",\"values\":"));
}
return o.Append("}");
case Schema.Type.Enumeration:
case Schema.Type.Fixed:
case Schema.Type.Record:
NamedSchema namedSchema = s as NamedSchema;
var name = namedSchema.Fullname;
if (env.ContainsKey(name))
{
return o.Append(env[name]);
}
var qname = "\"" + name + "\"";
env.Add(name, qname);
o.Append("{\"name\":").Append(qname);
o.Append(",\"type\":\"").Append(Schema.GetTypeString(s.Tag)).Append("\"");
if (st == Schema.Type.Enumeration)
{
EnumSchema enumSchema = s as EnumSchema;
o.Append(",\"symbols\":[");
foreach (var enumSymbol in enumSchema.Symbols)
{
if (!firstTime)
{
o.Append(",");
}
else
{
firstTime = false;
}
o.Append("\"").Append(enumSymbol).Append("\"");
}
o.Append("]");
}
else if (st == Schema.Type.Fixed)
{
FixedSchema fixedSchema = s as FixedSchema;
o.Append(",\"size\":").Append(fixedSchema.Size.ToString());
}
else // st == Schema.Type.Record
{
RecordSchema recordSchema = s as RecordSchema;
o.Append(",\"fields\":[");
foreach (var field in recordSchema.Fields)
{
if (!firstTime)
{
o.Append(",");
}
else
{
firstTime = false;
}
o.Append("{\"name\":\"").Append(field.Name).Append("\"");
Build(env, field.Schema, o.Append(",\"type\":")).Append("}");
}
o.Append("]");
}
return o.Append("}");
default: //boolean, bytes, double, float, int, long, null, string
return o.Append("\"").Append(s.Name).Append("\"");
}
}
private static class Fp64
{
private static readonly long[] fpTable = new long[256];
public static long[] FpTable
{
get { return fpTable; }
}
static Fp64()
{
for (int i = 0; i < 256; i++)
{
long fp = i;
for (int j = 0; j < 8; j++)
{
long mask = -(fp & 1L);
fp = ((long) (((ulong) fp) >> 1)) ^ (Empty64 & mask);
}
FpTable[i] = fp;
}
}
}
}
}