1 /** 2 * Parses the URL-host (not the url) 3 * e.g. www.github.com , lb1.www.some-cool-domain.co.uk , 127.0.0.1 , 2001:0db8:0:0:0:0:1428:57ab 4 * 5 * Copyright: 6 * (C) 2016 Martin Brzenska 7 * 8 * License: 9 * Distributed under the terms of the MIT license. 10 * Consult the provided LICENSE.md file for details 11 */ 12 module libhosttokens.host; 13 14 15 /** 16 * A parsed hostname. 17 */ 18 struct Host 19 { 20 ///The original hostname. 21 string host; 22 ///A list of subdomains. 23 string[] subdomains; 24 ///The part of the domain between the subdomain and tld/ccSLD. 25 string lowlevelDomain; 26 ///A list of TLD or ccSLD and TLD. 27 string[] reglevels = []; 28 ///True if the hostname is a IP (IPv4 or IPv6). 29 bool isIP; 30 31 32 string toString() { 33 return this.host; 34 } 35 36 ///The TLD or ccSLD.TLD. 37 @property tld() { 38 import std.array : join; 39 return this.reglevels.join("."); 40 } 41 42 ///The part of a hostname, that is before (right of) the subdomains. 43 @property paylevelDomain() { 44 return (this.lowlevelDomain.length ? this.lowlevelDomain ~ ( this.isIP ? "" : ".") : "") ~ this.tld; 45 } 46 47 ///The part of a hostname, that is after (left of) the paylevelDomain. 48 @property subdomain() { 49 import std.array : join; 50 return this.subdomains.join("."); 51 } 52 53 } 54 55 /** 56 * Parses a hostname 57 * Params: 58 * host = the Hostname to be parsed 59 * 60 * Returns: A Host struct containing the the hostname elements (subdomain , paylevelDomain , tld ...). 61 */ 62 immutable(Host) parseHost(string host) { 63 import std.array : split; 64 import std.algorithm.searching : find; 65 import std.algorithm.mutation : reverse; 66 import std.socket : parseAddress , Address , SocketException; 67 68 import libhosttokens.ccSLD : ccSLDs; 69 70 string[] sHost_subdomains; 71 string sHost_lowlevelDomain; 72 string[] sHost_reglevels; 73 bool sHost_isIP; 74 75 bool isIPaddr = true; 76 Address addr; 77 try 78 { 79 addr = parseAddress(host); 80 } 81 catch(SocketException e) { 82 isIPaddr = false; 83 } 84 85 if(isIPaddr) { 86 sHost_isIP = true; 87 sHost_lowlevelDomain = addr.toAddrString(); 88 return immutable(Host)( 89 host, 90 [], 91 sHost_lowlevelDomain, 92 [], 93 sHost_isIP 94 ); 95 } 96 97 auto arrHost = split(host , "."); 98 arrHost.reverse(); 99 100 //Parse TLD/ccSLD 101 string ccSLD; 102 size_t lastLevel; 103 foreach(size_t level , string domain ; arrHost) { 104 lastLevel = level; 105 if( level == 0 && domain !in ccSLDs) { 106 sHost_reglevels ~= domain; 107 break; 108 } 109 else if( level == 0 && domain in ccSLDs) { 110 ccSLD = domain; 111 sHost_reglevels ~= ccSLD; 112 } 113 else if( level == 1 && ccSLDs[ccSLD].find(domain)) { 114 sHost_reglevels ~= domain; 115 break; 116 } 117 } 118 sHost_reglevels.reverse(); 119 120 //Paydomain 121 sHost_lowlevelDomain = arrHost[++lastLevel]; 122 123 //Subdomains 124 for(size_t i = ++lastLevel ; i < arrHost.length ; i++) { 125 sHost_subdomains ~= arrHost[i]; 126 } 127 sHost_subdomains.reverse(); 128 129 return immutable(Host)( 130 host, 131 sHost_subdomains.idup, 132 sHost_lowlevelDomain, 133 sHost_reglevels.idup, 134 sHost_isIP 135 ); 136 } 137 138 unittest { 139 140 auto host = parseHost("profil.mab-on.net"); 141 assert(host.lowlevelDomain == "mab-on"); 142 assert(host.tld == "net"); 143 assert(host.subdomain == "profil"); 144 assert(host.paylevelDomain == "mab-on.net"); 145 146 host = parseHost("www.amazon.co.uk"); 147 assert(host.lowlevelDomain == "amazon"); 148 assert(host.tld == "co.uk"); 149 assert(host.subdomain == "www"); 150 assert(host.paylevelDomain == "amazon.co.uk"); 151 152 host = parseHost("www.herts.police.uk"); 153 assert(host.lowlevelDomain == "herts"); 154 assert(host.tld == "police.uk"); 155 assert(host.subdomain == "www"); 156 assert(host.paylevelDomain == "herts.police.uk"); 157 158 host = parseHost("www.ub.uni-koeln.de"); 159 assert(host.lowlevelDomain == "uni-koeln"); 160 assert(host.tld == "de"); 161 assert(host.subdomain == "www.ub"); 162 assert(host.paylevelDomain == "uni-koeln.de"); 163 164 host = parseHost("127.0.0.1"); 165 assert(host.lowlevelDomain == "127.0.0.1" , host.lowlevelDomain); 166 assert(host.tld == ""); 167 assert(host.subdomain == ""); 168 assert(host.paylevelDomain == "127.0.0.1"); 169 170 import std.format : format; 171 host = parseHost("2001:0db8:85a3:08d3:1319:8a2e:0370:7344"); 172 assert(host.lowlevelDomain == "2001:db8:85a3:8d3:1319:8a2e:370:7344" , host.lowlevelDomain); 173 //Note, that Host.host is the original IPv6 String - other properties shortens the address. 174 assert(format("%s",host) == "2001:0db8:85a3:08d3:1319:8a2e:0370:7344"); 175 assert(host.tld == ""); 176 assert(host.subdomain == ""); 177 assert(host.paylevelDomain == "2001:db8:85a3:8d3:1319:8a2e:370:7344"); 178 }