%% @author Bob Ippolito %% @copyright 2007 Mochi Media, Inc. %% @doc Converts HTML 4 charrefs and entities to codepoints. -module(mochiweb_charref). -export([charref/1, test/0]). %% External API. %% @spec charref(S) -> integer() | undefined %% @doc Convert a decimal charref, hex charref, or html entity to a unicode %% codepoint, or return undefined on failure. %% The input should not include an ampersand or semicolon. %% charref("#38") = 38, charref("#x26") = 38, charref("amp") = 38. charref(B) when is_binary(B) -> charref(binary_to_list(B)); charref([$#, C | L]) when C =:= $x orelse C =:= $X -> try erlang:list_to_integer(L, 16) catch error:badarg -> undefined end; charref([$# | L]) -> try list_to_integer(L) catch error:badarg -> undefined end; charref(L) -> entity(L). %% @spec test() -> ok %% @doc Run tests for mochiweb_charref. test() -> 1234 = charref("#1234"), 255 = charref("#xfF"), 255 = charref("#XFf"), 38 = charref("amp"), undefined = charref("not_an_entity"), ok. %% Internal API. entity("nbsp") -> 160; entity("iexcl") -> 161; entity("cent") -> 162; entity("pound") -> 163; entity("curren") -> 164; entity("yen") -> 165; entity("brvbar") -> 166; entity("sect") -> 167; entity("uml") -> 168; entity("copy") -> 169; entity("ordf") -> 170; entity("laquo") -> 171; entity("not") -> 172; entity("shy") -> 173; entity("reg") -> 174; entity("macr") -> 175; entity("deg") -> 176; entity("plusmn") -> 177; entity("sup2") -> 178; entity("sup3") -> 179; entity("acute") -> 180; entity("micro") -> 181; entity("para") -> 182; entity("middot") -> 183; entity("cedil") -> 184; entity("sup1") -> 185; entity("ordm") -> 186; entity("raquo") -> 187; entity("frac14") -> 188; entity("frac12") -> 189; entity("frac34") -> 190; entity("iquest") -> 191; entity("Agrave") -> 192; entity("Aacute") -> 193; entity("Acirc") -> 194; entity("Atilde") -> 195; entity("Auml") -> 196; entity("Aring") -> 197; entity("AElig") -> 198; entity("Ccedil") -> 199; entity("Egrave") -> 200; entity("Eacute") -> 201; entity("Ecirc") -> 202; entity("Euml") -> 203; entity("Igrave") -> 204; entity("Iacute") -> 205; entity("Icirc") -> 206; entity("Iuml") -> 207; entity("ETH") -> 208; entity("Ntilde") -> 209; entity("Ograve") -> 210; entity("Oacute") -> 211; entity("Ocirc") -> 212; entity("Otilde") -> 213; entity("Ouml") -> 214; entity("times") -> 215; entity("Oslash") -> 216; entity("Ugrave") -> 217; entity("Uacute") -> 218; entity("Ucirc") -> 219; entity("Uuml") -> 220; entity("Yacute") -> 221; entity("THORN") -> 222; entity("szlig") -> 223; entity("agrave") -> 224; entity("aacute") -> 225; entity("acirc") -> 226; entity("atilde") -> 227; entity("auml") -> 228; entity("aring") -> 229; entity("aelig") -> 230; entity("ccedil") -> 231; entity("egrave") -> 232; entity("eacute") -> 233; entity("ecirc") -> 234; entity("euml") -> 235; entity("igrave") -> 236; entity("iacute") -> 237; entity("icirc") -> 238; entity("iuml") -> 239; entity("eth") -> 240; entity("ntilde") -> 241; entity("ograve") -> 242; entity("oacute") -> 243; entity("ocirc") -> 244; entity("otilde") -> 245; entity("ouml") -> 246; entity("divide") -> 247; entity("oslash") -> 248; entity("ugrave") -> 249; entity("uacute") -> 250; entity("ucirc") -> 251; entity("uuml") -> 252; entity("yacute") -> 253; entity("thorn") -> 254; entity("yuml") -> 255; entity("fnof") -> 402; entity("Alpha") -> 913; entity("Beta") -> 914; entity("Gamma") -> 915; entity("Delta") -> 916; entity("Epsilon") -> 917; entity("Zeta") -> 918; entity("Eta") -> 919; entity("Theta") -> 920; entity("Iota") -> 921; entity("Kappa") -> 922; entity("Lambda") -> 923; entity("Mu") -> 924; entity("Nu") -> 925; entity("Xi") -> 926; entity("Omicron") -> 927; entity("Pi") -> 928; entity("Rho") -> 929; entity("Sigma") -> 931; entity("Tau") -> 932; entity("Upsilon") -> 933; entity("Phi") -> 934; entity("Chi") -> 935; entity("Psi") -> 936; entity("Omega") -> 937; entity("alpha") -> 945; entity("beta") -> 946; entity("gamma") -> 947; entity("delta") -> 948; entity("epsilon") -> 949; entity("zeta") -> 950; entity("eta") -> 951; entity("theta") -> 952; entity("iota") -> 953; entity("kappa") -> 954; entity("lambda") -> 955; entity("mu") -> 956; entity("nu") -> 957; entity("xi") -> 958; entity("omicron") -> 959; entity("pi") -> 960; entity("rho") -> 961; entity("sigmaf") -> 962; entity("sigma") -> 963; entity("tau") -> 964; entity("upsilon") -> 965; entity("phi") -> 966; entity("chi") -> 967; entity("psi") -> 968; entity("omega") -> 969; entity("thetasym") -> 977; entity("upsih") -> 978; entity("piv") -> 982; entity("bull") -> 8226; entity("hellip") -> 8230; entity("prime") -> 8242; entity("Prime") -> 8243; entity("oline") -> 8254; entity("frasl") -> 8260; entity("weierp") -> 8472; entity("image") -> 8465; entity("real") -> 8476; entity("trade") -> 8482; entity("alefsym") -> 8501; entity("larr") -> 8592; entity("uarr") -> 8593; entity("rarr") -> 8594; entity("darr") -> 8595; entity("harr") -> 8596; entity("crarr") -> 8629; entity("lArr") -> 8656; entity("uArr") -> 8657; entity("rArr") -> 8658; entity("dArr") -> 8659; entity("hArr") -> 8660; entity("forall") -> 8704; entity("part") -> 8706; entity("exist") -> 8707; entity("empty") -> 8709; entity("nabla") -> 8711; entity("isin") -> 8712; entity("notin") -> 8713; entity("ni") -> 8715; entity("prod") -> 8719; entity("sum") -> 8721; entity("minus") -> 8722; entity("lowast") -> 8727; entity("radic") -> 8730; entity("prop") -> 8733; entity("infin") -> 8734; entity("ang") -> 8736; entity("and") -> 8743; entity("or") -> 8744; entity("cap") -> 8745; entity("cup") -> 8746; entity("int") -> 8747; entity("there4") -> 8756; entity("sim") -> 8764; entity("cong") -> 8773; entity("asymp") -> 8776; entity("ne") -> 8800; entity("equiv") -> 8801; entity("le") -> 8804; entity("ge") -> 8805; entity("sub") -> 8834; entity("sup") -> 8835; entity("nsub") -> 8836; entity("sube") -> 8838; entity("supe") -> 8839; entity("oplus") -> 8853; entity("otimes") -> 8855; entity("perp") -> 8869; entity("sdot") -> 8901; entity("lceil") -> 8968; entity("rceil") -> 8969; entity("lfloor") -> 8970; entity("rfloor") -> 8971; entity("lang") -> 9001; entity("rang") -> 9002; entity("loz") -> 9674; entity("spades") -> 9824; entity("clubs") -> 9827; entity("hearts") -> 9829; entity("diams") -> 9830; entity("quot") -> 34; entity("amp") -> 38; entity("lt") -> 60; entity("gt") -> 62; entity("OElig") -> 338; entity("oelig") -> 339; entity("Scaron") -> 352; entity("scaron") -> 353; entity("Yuml") -> 376; entity("circ") -> 710; entity("tilde") -> 732; entity("ensp") -> 8194; entity("emsp") -> 8195; entity("thinsp") -> 8201; entity("zwnj") -> 8204; entity("zwj") -> 8205; entity("lrm") -> 8206; entity("rlm") -> 8207; entity("ndash") -> 8211; entity("mdash") -> 8212; entity("lsquo") -> 8216; entity("rsquo") -> 8217; entity("sbquo") -> 8218; entity("ldquo") -> 8220; entity("rdquo") -> 8221; entity("bdquo") -> 8222; entity("dagger") -> 8224; entity("Dagger") -> 8225; entity("permil") -> 8240; entity("lsaquo") -> 8249; entity("rsaquo") -> 8250; entity("euro") -> 8364; entity(_) -> undefined.