opensextant.extractors.xcoord
1# coding: utf-8 2 3import arrow 4from pygeodesy.mgrs import Mgrs 5from pygeodesy.utm import Utm 6 7from opensextant import Coordinate 8from opensextant.FlexPat import PatternExtractor, RegexPatternManager, PatternMatch 9 10 11class ResolutionUncertainty: 12 UNKNOWN = 100000 13 REGIONAL = 50000 14 LOCAL = 5000 15 SITE = 1000 16 SPOT = 100 17 GPS = 10 18 19 20class Specificity: 21 DEG = 1 22 SUBDEG = 2 23 MINUTE = 3 24 SUBMINUTE = 4 25 SECOND = 5 26 SUBSECOND = 6 27 28 29HEMISPHERES = { 30 "-": -1, 31 "W": -1, 32 "S": -1, 33 "+": 1, 34 "E": 1, 35 "N": 1, 36 None: 1 37} 38 39default_specificity = Specificity.SUBDEG 40 41 42# History - 2024 may - MCU ported from XCoord Java 43# 44class XCoord(PatternExtractor): 45 """ 46 NOTE: a port of XCoord java (org.opensextant.extractors.xcoord, in Xponents-Core) 47 """ 48 49 def __init__(self, cfg="geocoord_patterns_py.cfg", debug=False, specificity=Specificity.SUBDEG): 50 """ 51 :param cfg: patterns config file. 52 :param specificity: the abstract level of resolution for validating coordinates, e.g., DEGREE, SUB-DEGREE, etc. 53 use Specificity enumeration 54 """ 55 PatternExtractor.__init__(self, RegexPatternManager(cfg, debug=debug, testing=debug)) 56 global default_specificity 57 default_specificity = specificity 58 59 60def hemisphere_factor(sym: str) -> int: 61 if sym: 62 return HEMISPHERES.get(sym.upper()) 63 return HEMISPHERES.get(None) 64 65 66def one_value(*args): 67 """ 68 :param args: 69 :return: first non-null value. 70 """ 71 for val in args: 72 if val is not None: 73 return val 74 return None 75 76 77def is_blank(txt: str): 78 if txt is None: 79 # Sorry -- you have to determine if obj is string or not first. None does not count. 80 return False 81 return txt == '' or txt.strip() == '' 82 83 84def strip(txt: str): 85 if txt is None: 86 # Sorry -- you have to determine if obj is string or not first. None does not count. 87 return False 88 return txt.strip() 89 90 91class Hemisphere: 92 def __init__(self, axis, slots=None): 93 self.axis = axis 94 self.symbol = None 95 self.polarity = 0 96 self.slots = slots 97 self.normalize() 98 99 def is_alpha(self) -> bool: 100 return self.symbol and self.symbol.isalpha() 101 102 def standard_format(self) -> str: 103 """ 104 Caution -- test for presence of symbol first, as decimal value without hemisphere may not be geo coord at all. 105 """ 106 if self.polarity >= 0: 107 return "+" 108 if self.polarity < 0: 109 return "-" 110 111 def normalize(self): 112 if not self.slots: 113 return 114 if self.axis == "lon": 115 for slot in ["hemiLon", "hemiLonSign", "hemiLonPre"]: 116 if slot in self.slots: 117 self.symbol = self.slots.get(slot) 118 if not self.symbol: 119 self.polarity = 1 120 return 121 122 if self.axis == "lat": 123 for slot in ["hemiLat", "hemiLatSign", "hemiLatPre"]: 124 if slot in self.slots: 125 self.symbol = self.slots.get(slot) 126 if not self.symbol: 127 self.polarity = 1 128 return 129 130 if self.symbol: 131 self.symbol = self.symbol.upper().strip() 132 self.polarity = hemisphere_factor(self.symbol) 133 134 135class DMSOrdinate: 136 SYMBOLS = {"°", "º", "'", "\"", ":", "lat", "lon", "geo", "coord", "deg"} 137 138 def __init__(self, axis: str, text: str, fam: str, slots=None): 139 self.axis = axis 140 self.text = text 141 self.pattern_family = fam 142 self.slots = slots 143 self.degrees = None 144 self.min = None 145 self.seconds = None 146 self.hemi = None 147 self.symbols = set() 148 self.normalized_slots = dict() 149 self.resolution = ResolutionUncertainty.UNKNOWN 150 self.specificity = Specificity.DEG 151 self.normalize() 152 153 def is_valid(self): 154 if self.degrees is None: 155 return False 156 # Must have degrees, in range for the axis 157 if self.axis == "lat": 158 if not -90 < self.degrees < 90: 159 return False 160 if self.axis == "lon": 161 if not -180 < self.degrees < 180: 162 return False 163 # Min and Secs must be in range if specified 164 if self.min is not None and not 0 <= self.min < 60: 165 return False 166 if self.seconds is not None and not 0 <= self.seconds < 60: 167 return False 168 169 return True 170 171 def has_minutes(self): 172 return self.min and (self.specificity == Specificity.MINUTE or self.specificity == Specificity.SUBMINUTE) 173 174 def has_submin(self): 175 return self.specificity == Specificity.SUBMINUTE 176 177 def has_seconds(self): 178 return self.seconds and (self.specificity == Specificity.SECOND or self.specificity == Specificity.SUBSECOND) 179 180 def has_subsec(self): 181 return self.specificity == Specificity.SUBSECOND 182 183 def has_symbols(self): 184 return len(self.symbols) > 0 185 186 def normalize(self): 187 """ 188 Parse all slots for the pattern, normalizing found items as both 189 string and numeric representation. That is, the string portion of the value should be preserved 190 to avoid inserting additional precision not present in the value. e.g., "30.44" is 2-sig-figs, and not 191 "30.4400001" or whatever artifacts come with floating point computation. 192 193 separators and symbols present are useful in post-match processing/filtering to weed out false positives. 194 """ 195 if not self.slots: 196 return 197 txtnorm = self.text.lower() 198 for sym in DMSOrdinate.SYMBOLS: 199 if sym in txtnorm: 200 self.symbols.add(sym) 201 202 self.hemi = Hemisphere(self.axis, slots=self.slots) 203 if self.axis == "lat": 204 self.digest_lat() 205 elif self.axis == "lon": 206 self.digest_lon() 207 208 def decimal(self): 209 pol = 1 210 if self.hemi: 211 # Validity check of presence of Hemisphere symbol is separate. 212 pol = self.hemi.polarity 213 if not pol: 214 raise Exception("logic error - hemisphere was not resolved") 215 216 if self.seconds is not None and self.min is not None and self.degrees is not None: 217 if self.seconds < 60: 218 return pol * (self.degrees + self.min / 60 + self.seconds / 3600) 219 if self.min is not None and self.degrees is not None: 220 if self.min < 60: 221 return pol * (self.degrees + self.min / 60) 222 if self.degrees is not None: 223 return pol * self.degrees 224 return None 225 226 def digest_lat(self): 227 self._digest_slots("Lat") 228 229 def digest_lon(self): 230 self._digest_slots("Lon") 231 232 def _digest_slots(self, axis): 233 """ 234 Fields or slots are named xxxLatxx or xxxLonxx 235 """ 236 if self.pattern_family == "DMS": 237 min_sec_sep = self.slots.get(f"ms{axis}Sep") 238 deg_min_sep = self.slots.get(f"dm{axis}Sep") 239 if min_sec_sep and deg_min_sep and min_sec_sep == "." and min_sec_sep != deg_min_sep: 240 # valid coordinate, but separators like "DD MM.ss" suggest more DM pattern 241 # whereas "DD.MM.SS" with consistent separators is DMS. 242 return 243 244 # DEGREES 245 deg = self.get_int(f"deg{axis}", "deg") 246 deg2 = self.get_int(f"dmsDeg{axis}", "deg") 247 deg3 = self.get_decimal(f"decDeg{axis}", "deg") 248 self.degrees = one_value(deg, deg2, deg3) 249 if self.degrees is not None: 250 self.specificity = Specificity.DEG 251 if deg3 is not None: 252 self.specificity = Specificity.SUBDEG 253 else: 254 return 255 256 # MINUTES 257 minutes = self.get_int(f"min{axis}", "min") 258 minutes2 = self.get_int(f"dmsMin{axis}", "min") 259 minutes3 = self.get_decimal(f"decMin{axis}", "min") 260 mindash = self.get_decimal(f"decMin{axis}3", "min") 261 262 self.min = one_value(minutes, minutes2, minutes3, mindash) 263 if self.min is not None: 264 self.specificity = Specificity.MINUTE 265 266 min_fract = self.get_fractional(f"fractMin{axis}", "fmin") 267 min_fract2 = self.get_fractional(f"fractMin{axis}3", "fmin") 268 # variation 2, is a 3-digit or longer fraction 269 270 fmin = one_value(min_fract, min_fract2) 271 if fmin is not None: 272 self.specificity = Specificity.SUBMINUTE 273 self.min += fmin 274 275 else: 276 return 277 278 # SECONDS 279 sec = self.get_int(f"sec{axis}", "sec") 280 sec2 = self.get_int(f"dmsSec{axis}", "sec") 281 self.seconds = one_value(sec, sec2) 282 if self.seconds is not None: 283 self.specificity = Specificity.SECOND 284 285 fsec = self.get_fractional(f"fractSec{axis}", "fsec") 286 fsec2 = self.get_fractional(f"fractSec{axis}Opt", "fsec") 287 fseconds = one_value(fsec, fsec2) 288 if fseconds is not None: 289 self.specificity = Specificity.SUBSECOND 290 self.seconds += fseconds 291 return 292 293 def get_int(self, f, fnorm): 294 if f in self.slots: 295 val = self.slots[f] 296 self.normalized_slots[fnorm] = self.slots[f] 297 return int(val) 298 299 def get_decimal(self, f, fnorm): 300 """ 301 find slot and convert pattern "-dddd..." to 0.dddd... 302 Also, if fraction is simply "dddd..." then insert "." at front. 303 """ 304 if f in self.slots: 305 val = self.slots[f] 306 if "-" in val: 307 val = val.replace("-", ".") 308 self.normalized_slots[fnorm] = val 309 return float(val) 310 311 def get_fractional(self, f, fnorm): 312 """ 313 find slot and convert pattern "-dddd..." to 0.dddd... 314 Also if fraction is simply "dddd..." then insert "." at front. 315 """ 316 if f in self.slots: 317 val = self.slots[f] 318 if not val: 319 return None 320 if val.startswith("-"): 321 val = val.replace("-", ".") 322 elif not val.startswith("."): 323 val = f".{val}" 324 self.normalized_slots[fnorm] = val 325 return float(val) 326 327 328class GeocoordMatch(PatternMatch): 329 def __init__(self, *args, **kwargs): 330 PatternMatch.__init__(self, *args, **kwargs) 331 self.case = PatternMatch.UPPER_CASE 332 self.geodetic = None 333 self.coordinate: Coordinate = None 334 self.parsing_err: str = None 335 self.lat_ordinate: DMSOrdinate = None 336 self.lon_ordinate: DMSOrdinate = None 337 self.filter: GeocoordFilter = None 338 self.pattern_family = self.pattern_id.split("-", 1)[0] 339 340 def __str__(self): 341 return f"{self.text}" 342 343 def normalize(self): 344 PatternMatch.normalize(self) 345 self.is_valid = False 346 self.filtered_out = True 347 348 def _make_coordinate(self): 349 if self.lat_ordinate and self.lon_ordinate: 350 self.is_valid = self.lon_ordinate.is_valid() and self.lon_ordinate.is_valid() 351 if self.is_valid: 352 # continue to weed out noise. 353 self.coordinate = Coordinate(None, 354 lat=self.lat_ordinate.decimal(), 355 lon=self.lon_ordinate.decimal()) 356 self.is_valid = self.coordinate.validate() 357 elif self.geodetic: 358 self.is_valid = True 359 self.filtered_out = False 360 LL = self.geodetic.toLatLon() 361 self.coordinate = Coordinate(None, lat=LL.lat, lon=LL.lon) 362 # These are parsed by UTM and MGRS libraries, so coordinate is assumed valid. 363 364 def filter_by_resolution(self): 365 """ Check specificity of ordinates -- as parsed, if LAT or LON has the minimum level of detail 366 367 40N -- could be "40 North" 368 +40.0000 -- also "40 North", but precision is specified to 4sigfig. 369 +40:00:00 -- well, could also be an hour marker ~ 40 hours 370 371 :return: TRUE if coordinate is specific and resolution is high enough. 372 """ 373 if not self.lat_ordinate or not self.lon_ordinate: 374 # If unset, we'll simply filter OUT 375 return False 376 lat_valid = self.lat_ordinate.specificity >= default_specificity 377 lon_valid = self.lon_ordinate.specificity >= default_specificity 378 return lat_valid and lon_valid 379 380 381class GeocoordFilter: 382 def filter_out(self, m: GeocoordMatch) -> tuple: 383 return False, "reason" 384 385 386class MGRSFilter(GeocoordFilter): 387 def __init__(self): 388 GeocoordFilter.__init__(self) 389 self.date_formats = ["DDMMMYYYY", "DMMMYYHHmm", "DDMMMYYHHmm", "DDMMMYY", "DMMMYY", "HHZZZYYYY"] 390 self.sequences = ["1234", "123456", "12345678", "1234567890"] 391 self.stop_terms = {"PER", "SEC", "UTC", "GMT", "GAL", "USC", "CAN", 392 "JAN", "FEB", "MAR", "APR", "MAY", "JUN", 393 "JUL", "AUG", "SEP", "OCT", "NOV", "DEC"} 394 self.today = arrow.utcnow() 395 self.YEAR = self.today.date().year 396 self.YY = self.YEAR - 2000 397 self.RECENT_YEAR_THRESHOLD = 30 398 399 def filter_out(self, mgrs: GeocoordMatch) -> tuple: 400 """ 401 :return: True if filtered out, false positive. 402 """ 403 # MGRS rules: upper case alphanumeric, greater than 6 chars, 404 # subjective: 405 # - is not a digit sequence; 406 # - is not a recent date; 407 # - is not a rate ('NNN per LB'); 408 # - is not time with 'sec' 409 # Lexical filters: 410 if not mgrs.is_valid: 411 # parsed earlier as invalid. 412 return True, "invalid" 413 414 if not (mgrs.text.isupper() and len(mgrs.text.replace(" ", "")) > 6): 415 return True, "lexical" 416 if "\t" in mgrs.text or "\n" in mgrs.text: 417 return True, "format-ws" 418 for term in self.stop_terms: 419 if term in mgrs.textnorm: 420 return True, "measure" 421 for seq in self.sequences: 422 if seq in mgrs.textnorm: 423 return True, "digit-seq" 424 425 # Date Filter 426 for fmt in self.date_formats: 427 fmtlen = len(fmt) 428 date_test = mgrs.textnorm[0:fmtlen] 429 try: 430 dt = arrow.get(date_test, fmt) 431 if self._is_recent(dt): 432 return True, "date" 433 except Exception as parse_err: 434 pass 435 436 # Not filtered out 437 return False, None 438 439 def _is_recent(self, dt: arrow): 440 """ 441 checks if a year slot represents a recent YYYY or YY year. 442 """ 443 return abs(dt.date().year - self.YEAR) <= self.RECENT_YEAR_THRESHOLD 444 445 446class DMSFilter(GeocoordFilter): 447 def __init__(self): 448 GeocoordFilter.__init__(self) 449 self.date_formats = ["YY-DD-MM HH:mm:ss", "MM-DD-YY HH:mm:ss"] 450 451 def filter_out(self, dms: GeocoordMatch) -> tuple: 452 """ 453 Easy filter -- if puncutation matches, this is an easy pattern to ignore. 454 :return: True if filtered out, false positive. 455 """ 456 if dms.is_valid: 457 if not dms.filter_by_resolution(): 458 # Not valid -- or at least not meeting users level of specificity. 459 return True 460 461 if dms.is_valid: 462 if dms.text[0].isalpha(): 463 return False, None 464 for fmt in self.date_formats: 465 try: 466 dt = arrow.get(dms.text, fmt) 467 # Recency matters not. Tests are literal date formats 468 return True, "date" 469 except Exception as err: 470 pass 471 # Not filtered. Is valid. 472 return False, None 473 # Filter out. invalid. 474 return True, "invalid" 475 476 477mgrs_filter = MGRSFilter() 478dms_filter = DMSFilter() 479 480 481class MGRSMatch(GeocoordMatch): 482 def __init__(self, *args, **kwargs): 483 GeocoordMatch.__init__(self, *args, **kwargs) 484 self.filter = mgrs_filter 485 486 def validate(self): 487 self.filtered_out, self.parsing_err = self.filter.filter_out(self) 488 489 def normalize(self): 490 GeocoordMatch.normalize(self) 491 slots = self.attributes() 492 self.textnorm = self.textnorm.replace(" ", "") 493 494 z = slots.get("MGRSZone") 495 q = slots.get("MGRSQuad") 496 east_north = slots.get("Easting_Northing") 497 498 e, n = None, None 499 if " " in east_north: 500 e, n = east_north.split(" ", 1) 501 le = len(e) 502 ln = len(n) 503 if le != ln: 504 resolution = min(le, ln) 505 e = e[:resolution] 506 n = n[:resolution] 507 elif len(east_north) % 2 == 0: 508 resolution = int(len(east_north) / 2) 509 e, n = east_north[0:resolution], east_north[resolution:] 510 511 if e and n: 512 try: 513 e = int(e) 514 n = int(n) 515 self.geodetic = Mgrs(zone=z, EN=q, easting=e, northing=n) 516 self._make_coordinate() 517 self.validate() 518 except Exception as err: 519 self.parsing_err = str(err) 520 521 522class UTMMatch(GeocoordMatch): 523 def __init__(self, *args, **kwargs): 524 GeocoordMatch.__init__(self, *args, **kwargs) 525 526 def normalize(self): 527 GeocoordMatch.normalize(self) 528 slots = self.attributes() 529 530 z = slots.get("UTMZone") 531 z1 = slots.get("UTMZoneZZ") # 0-5\d 532 z2 = slots.get("UTMZoneZ") # \d 533 534 try: 535 ZZ = int(one_value(z, z1, z2)) 536 band = slots.get("UTMBand") 537 if not band: 538 return 539 540 hemi = band[0] 541 e = slots.get("UTMEasting") 542 n = slots.get("UTMNorthing") 543 if e and n: 544 self.geodetic = Utm(zone=ZZ, hemisphere=hemi, band=band, easting=int(e), northing=int(n)) 545 self._make_coordinate() 546 except Exception as err: 547 self.parsing_err = str(err) 548 549 550class DegMinMatch(GeocoordMatch): 551 def __init__(self, *args, **kwargs): 552 GeocoordMatch.__init__(self, *args, **kwargs) 553 554 def validate(self): 555 556 slots = self.attributes() 557 if self.is_valid: 558 # Punct - separators must match for DM patterns. 559 lat_sep = strip(slots.get("dmLatSep")) 560 lon_sep = strip(slots.get("dmLonSep")) 561 if (lat_sep or lon_sep) and (lat_sep != lon_sep): 562 self.is_valid = False 563 564 # TODO: evaluate other dashes: GeocoordNormalization eval dashes, eval punct 565 self.filtered_out = not self.is_valid 566 567 def normalize(self): 568 GeocoordMatch.normalize(self) 569 570 # DEG MIN (DM) patterns, with fractional min 571 # 572 # dmsDegLat > [-\s]? < dmsMinLat > < hemiLat > < fractMinLat > < latlonSep > < dmsDegLon > [-\s]? < dmsMinLon > < hemiLon > < fractMinLon 573 # 574 # degLat > < dmLatSep >\s? < minLat > < fractMinLat >? < msLatSep >?\s? < hemiLat > < latlonSep3 > < degLon > < dmLonSep >\s? < minLon > < fractMinLon 575 # 576 # < hemiLatPre >\s? < degLat > < dmLatSep >\s? < minLat > < fractMinLat >? < msLatSep >? < latlonSep3 > 577 # < hemiLonPre >\s? < degLon > < dmLonSep >\s? < minLon > < fractMinLon >? < msLonSep >? 578 579 # TODO: conditions that invalidate this pattern? 580 self.lat_ordinate = DMSOrdinate("lat", self.text, self.pattern_family, slots=self.attributes()) 581 self.lon_ordinate = DMSOrdinate("lon", self.text, self.pattern_family, slots=self.attributes()) 582 self._make_coordinate() 583 self.validate() 584 585 586class DegMinSecMatch(GeocoordMatch): 587 def __init__(self, *args, **kwargs): 588 GeocoordMatch.__init__(self, *args, **kwargs) 589 self.filter = dms_filter 590 591 def normalize(self): 592 GeocoordMatch.normalize(self) 593 self.lat_ordinate = DMSOrdinate("lat", self.text, self.pattern_family, slots=self.attributes()) 594 self.lon_ordinate = DMSOrdinate("lon", self.text, self.pattern_family, slots=self.attributes()) 595 self._make_coordinate() 596 self.validate() 597 598 def validate(self): 599 self.filtered_out, self.parsing_err = self.filter.filter_out(self) 600 601 602class DecimalDegMatch(GeocoordMatch): 603 def __init__(self, *args, **kwargs): 604 GeocoordMatch.__init__(self, *args, **kwargs) 605 606 def validate(self): 607 """ 608 Validate a parsed coordinate. 609 55.60, 80.11 -- not valid 610 N55.60, W80.11 -- valid 611 +55.60, -80.11 -- valid 612 S20 E33 -- not valid, by default looking for sub-degree resolution. 613 614 Validate also if the coordinate is a valid range for Lat/Lon. 615 """ 616 if not self.is_valid: 617 return 618 619 lath = self.lat_ordinate.hemi 620 lonh = self.lon_ordinate.hemi 621 valid_hemi = lath and lonh and lath.is_alpha() and lonh.is_alpha() 622 valid_sym = self.lat_ordinate.has_symbols() or self.lon_ordinate.has_symbols() 623 self.is_valid = (valid_hemi or valid_sym) and self.filter_by_resolution() 624 625 self.filtered_out = not self.is_valid 626 627 def normalize(self): 628 GeocoordMatch.normalize(self) 629 self.lat_ordinate = DMSOrdinate("lat", self.text, self.pattern_family, slots=self.attributes()) 630 self.lon_ordinate = DMSOrdinate("lon", self.text, self.pattern_family, slots=self.attributes()) 631 self._make_coordinate() 632 self.validate()
45class XCoord(PatternExtractor): 46 """ 47 NOTE: a port of XCoord java (org.opensextant.extractors.xcoord, in Xponents-Core) 48 """ 49 50 def __init__(self, cfg="geocoord_patterns_py.cfg", debug=False, specificity=Specificity.SUBDEG): 51 """ 52 :param cfg: patterns config file. 53 :param specificity: the abstract level of resolution for validating coordinates, e.g., DEGREE, SUB-DEGREE, etc. 54 use Specificity enumeration 55 """ 56 PatternExtractor.__init__(self, RegexPatternManager(cfg, debug=debug, testing=debug)) 57 global default_specificity 58 default_specificity = specificity
NOTE: a port of XCoord java (org.opensextant.extractors.xcoord, in Xponents-Core)
50 def __init__(self, cfg="geocoord_patterns_py.cfg", debug=False, specificity=Specificity.SUBDEG): 51 """ 52 :param cfg: patterns config file. 53 :param specificity: the abstract level of resolution for validating coordinates, e.g., DEGREE, SUB-DEGREE, etc. 54 use Specificity enumeration 55 """ 56 PatternExtractor.__init__(self, RegexPatternManager(cfg, debug=debug, testing=debug)) 57 global default_specificity 58 default_specificity = specificity
:param cfg: patterns config file. :param specificity: the abstract level of resolution for validating coordinates, e.g., DEGREE, SUB-DEGREE, etc. use Specificity enumeration
Inherited Members
67def one_value(*args): 68 """ 69 :param args: 70 :return: first non-null value. 71 """ 72 for val in args: 73 if val is not None: 74 return val 75 return None
:param args: :return: first non-null value.
329class GeocoordMatch(PatternMatch): 330 def __init__(self, *args, **kwargs): 331 PatternMatch.__init__(self, *args, **kwargs) 332 self.case = PatternMatch.UPPER_CASE 333 self.geodetic = None 334 self.coordinate: Coordinate = None 335 self.parsing_err: str = None 336 self.lat_ordinate: DMSOrdinate = None 337 self.lon_ordinate: DMSOrdinate = None 338 self.filter: GeocoordFilter = None 339 self.pattern_family = self.pattern_id.split("-", 1)[0] 340 341 def __str__(self): 342 return f"{self.text}" 343 344 def normalize(self): 345 PatternMatch.normalize(self) 346 self.is_valid = False 347 self.filtered_out = True 348 349 def _make_coordinate(self): 350 if self.lat_ordinate and self.lon_ordinate: 351 self.is_valid = self.lon_ordinate.is_valid() and self.lon_ordinate.is_valid() 352 if self.is_valid: 353 # continue to weed out noise. 354 self.coordinate = Coordinate(None, 355 lat=self.lat_ordinate.decimal(), 356 lon=self.lon_ordinate.decimal()) 357 self.is_valid = self.coordinate.validate() 358 elif self.geodetic: 359 self.is_valid = True 360 self.filtered_out = False 361 LL = self.geodetic.toLatLon() 362 self.coordinate = Coordinate(None, lat=LL.lat, lon=LL.lon) 363 # These are parsed by UTM and MGRS libraries, so coordinate is assumed valid. 364 365 def filter_by_resolution(self): 366 """ Check specificity of ordinates -- as parsed, if LAT or LON has the minimum level of detail 367 368 40N -- could be "40 North" 369 +40.0000 -- also "40 North", but precision is specified to 4sigfig. 370 +40:00:00 -- well, could also be an hour marker ~ 40 hours 371 372 :return: TRUE if coordinate is specific and resolution is high enough. 373 """ 374 if not self.lat_ordinate or not self.lon_ordinate: 375 # If unset, we'll simply filter OUT 376 return False 377 lat_valid = self.lat_ordinate.specificity >= default_specificity 378 lon_valid = self.lon_ordinate.specificity >= default_specificity 379 return lat_valid and lon_valid
A general Pattern-based TextMatch. This Python variation consolidates PoliMatch (patterns-of-life = poli) ideas in the Java API.
344 def normalize(self): 345 PatternMatch.normalize(self) 346 self.is_valid = False 347 self.filtered_out = True
Optional, but recommended routine to normalize the matched data. That is, parse fields, uppercase, streamline punctuation, etc. As well, given such normalization result, this is the opportunity to additionally validate the match. :return:
365 def filter_by_resolution(self): 366 """ Check specificity of ordinates -- as parsed, if LAT or LON has the minimum level of detail 367 368 40N -- could be "40 North" 369 +40.0000 -- also "40 North", but precision is specified to 4sigfig. 370 +40:00:00 -- well, could also be an hour marker ~ 40 hours 371 372 :return: TRUE if coordinate is specific and resolution is high enough. 373 """ 374 if not self.lat_ordinate or not self.lon_ordinate: 375 # If unset, we'll simply filter OUT 376 return False 377 lat_valid = self.lat_ordinate.specificity >= default_specificity 378 lon_valid = self.lon_ordinate.specificity >= default_specificity 379 return lat_valid and lon_valid
Check specificity of ordinates -- as parsed, if LAT or LON has the minimum level of detail
40N -- could be "40 North"
+40.0000 -- also "40 North", but precision is specified to 4sigfig.
+40:00:00 -- well, could also be an hour marker ~ 40 hours
:return: TRUE if coordinate is specific and resolution is high enough.
482class MGRSMatch(GeocoordMatch): 483 def __init__(self, *args, **kwargs): 484 GeocoordMatch.__init__(self, *args, **kwargs) 485 self.filter = mgrs_filter 486 487 def validate(self): 488 self.filtered_out, self.parsing_err = self.filter.filter_out(self) 489 490 def normalize(self): 491 GeocoordMatch.normalize(self) 492 slots = self.attributes() 493 self.textnorm = self.textnorm.replace(" ", "") 494 495 z = slots.get("MGRSZone") 496 q = slots.get("MGRSQuad") 497 east_north = slots.get("Easting_Northing") 498 499 e, n = None, None 500 if " " in east_north: 501 e, n = east_north.split(" ", 1) 502 le = len(e) 503 ln = len(n) 504 if le != ln: 505 resolution = min(le, ln) 506 e = e[:resolution] 507 n = n[:resolution] 508 elif len(east_north) % 2 == 0: 509 resolution = int(len(east_north) / 2) 510 e, n = east_north[0:resolution], east_north[resolution:] 511 512 if e and n: 513 try: 514 e = int(e) 515 n = int(n) 516 self.geodetic = Mgrs(zone=z, EN=q, easting=e, northing=n) 517 self._make_coordinate() 518 self.validate() 519 except Exception as err: 520 self.parsing_err = str(err)
A general Pattern-based TextMatch. This Python variation consolidates PoliMatch (patterns-of-life = poli) ideas in the Java API.
490 def normalize(self): 491 GeocoordMatch.normalize(self) 492 slots = self.attributes() 493 self.textnorm = self.textnorm.replace(" ", "") 494 495 z = slots.get("MGRSZone") 496 q = slots.get("MGRSQuad") 497 east_north = slots.get("Easting_Northing") 498 499 e, n = None, None 500 if " " in east_north: 501 e, n = east_north.split(" ", 1) 502 le = len(e) 503 ln = len(n) 504 if le != ln: 505 resolution = min(le, ln) 506 e = e[:resolution] 507 n = n[:resolution] 508 elif len(east_north) % 2 == 0: 509 resolution = int(len(east_north) / 2) 510 e, n = east_north[0:resolution], east_north[resolution:] 511 512 if e and n: 513 try: 514 e = int(e) 515 n = int(n) 516 self.geodetic = Mgrs(zone=z, EN=q, easting=e, northing=n) 517 self._make_coordinate() 518 self.validate() 519 except Exception as err: 520 self.parsing_err = str(err)
Optional, but recommended routine to normalize the matched data. That is, parse fields, uppercase, streamline punctuation, etc. As well, given such normalization result, this is the opportunity to additionally validate the match. :return:
523class UTMMatch(GeocoordMatch): 524 def __init__(self, *args, **kwargs): 525 GeocoordMatch.__init__(self, *args, **kwargs) 526 527 def normalize(self): 528 GeocoordMatch.normalize(self) 529 slots = self.attributes() 530 531 z = slots.get("UTMZone") 532 z1 = slots.get("UTMZoneZZ") # 0-5\d 533 z2 = slots.get("UTMZoneZ") # \d 534 535 try: 536 ZZ = int(one_value(z, z1, z2)) 537 band = slots.get("UTMBand") 538 if not band: 539 return 540 541 hemi = band[0] 542 e = slots.get("UTMEasting") 543 n = slots.get("UTMNorthing") 544 if e and n: 545 self.geodetic = Utm(zone=ZZ, hemisphere=hemi, band=band, easting=int(e), northing=int(n)) 546 self._make_coordinate() 547 except Exception as err: 548 self.parsing_err = str(err)
A general Pattern-based TextMatch. This Python variation consolidates PoliMatch (patterns-of-life = poli) ideas in the Java API.
527 def normalize(self): 528 GeocoordMatch.normalize(self) 529 slots = self.attributes() 530 531 z = slots.get("UTMZone") 532 z1 = slots.get("UTMZoneZZ") # 0-5\d 533 z2 = slots.get("UTMZoneZ") # \d 534 535 try: 536 ZZ = int(one_value(z, z1, z2)) 537 band = slots.get("UTMBand") 538 if not band: 539 return 540 541 hemi = band[0] 542 e = slots.get("UTMEasting") 543 n = slots.get("UTMNorthing") 544 if e and n: 545 self.geodetic = Utm(zone=ZZ, hemisphere=hemi, band=band, easting=int(e), northing=int(n)) 546 self._make_coordinate() 547 except Exception as err: 548 self.parsing_err = str(err)
Optional, but recommended routine to normalize the matched data. That is, parse fields, uppercase, streamline punctuation, etc. As well, given such normalization result, this is the opportunity to additionally validate the match. :return:
551class DegMinMatch(GeocoordMatch): 552 def __init__(self, *args, **kwargs): 553 GeocoordMatch.__init__(self, *args, **kwargs) 554 555 def validate(self): 556 557 slots = self.attributes() 558 if self.is_valid: 559 # Punct - separators must match for DM patterns. 560 lat_sep = strip(slots.get("dmLatSep")) 561 lon_sep = strip(slots.get("dmLonSep")) 562 if (lat_sep or lon_sep) and (lat_sep != lon_sep): 563 self.is_valid = False 564 565 # TODO: evaluate other dashes: GeocoordNormalization eval dashes, eval punct 566 self.filtered_out = not self.is_valid 567 568 def normalize(self): 569 GeocoordMatch.normalize(self) 570 571 # DEG MIN (DM) patterns, with fractional min 572 # 573 # dmsDegLat > [-\s]? < dmsMinLat > < hemiLat > < fractMinLat > < latlonSep > < dmsDegLon > [-\s]? < dmsMinLon > < hemiLon > < fractMinLon 574 # 575 # degLat > < dmLatSep >\s? < minLat > < fractMinLat >? < msLatSep >?\s? < hemiLat > < latlonSep3 > < degLon > < dmLonSep >\s? < minLon > < fractMinLon 576 # 577 # < hemiLatPre >\s? < degLat > < dmLatSep >\s? < minLat > < fractMinLat >? < msLatSep >? < latlonSep3 > 578 # < hemiLonPre >\s? < degLon > < dmLonSep >\s? < minLon > < fractMinLon >? < msLonSep >? 579 580 # TODO: conditions that invalidate this pattern? 581 self.lat_ordinate = DMSOrdinate("lat", self.text, self.pattern_family, slots=self.attributes()) 582 self.lon_ordinate = DMSOrdinate("lon", self.text, self.pattern_family, slots=self.attributes()) 583 self._make_coordinate() 584 self.validate()
A general Pattern-based TextMatch. This Python variation consolidates PoliMatch (patterns-of-life = poli) ideas in the Java API.
568 def normalize(self): 569 GeocoordMatch.normalize(self) 570 571 # DEG MIN (DM) patterns, with fractional min 572 # 573 # dmsDegLat > [-\s]? < dmsMinLat > < hemiLat > < fractMinLat > < latlonSep > < dmsDegLon > [-\s]? < dmsMinLon > < hemiLon > < fractMinLon 574 # 575 # degLat > < dmLatSep >\s? < minLat > < fractMinLat >? < msLatSep >?\s? < hemiLat > < latlonSep3 > < degLon > < dmLonSep >\s? < minLon > < fractMinLon 576 # 577 # < hemiLatPre >\s? < degLat > < dmLatSep >\s? < minLat > < fractMinLat >? < msLatSep >? < latlonSep3 > 578 # < hemiLonPre >\s? < degLon > < dmLonSep >\s? < minLon > < fractMinLon >? < msLonSep >? 579 580 # TODO: conditions that invalidate this pattern? 581 self.lat_ordinate = DMSOrdinate("lat", self.text, self.pattern_family, slots=self.attributes()) 582 self.lon_ordinate = DMSOrdinate("lon", self.text, self.pattern_family, slots=self.attributes()) 583 self._make_coordinate() 584 self.validate()
Optional, but recommended routine to normalize the matched data. That is, parse fields, uppercase, streamline punctuation, etc. As well, given such normalization result, this is the opportunity to additionally validate the match. :return:
587class DegMinSecMatch(GeocoordMatch): 588 def __init__(self, *args, **kwargs): 589 GeocoordMatch.__init__(self, *args, **kwargs) 590 self.filter = dms_filter 591 592 def normalize(self): 593 GeocoordMatch.normalize(self) 594 self.lat_ordinate = DMSOrdinate("lat", self.text, self.pattern_family, slots=self.attributes()) 595 self.lon_ordinate = DMSOrdinate("lon", self.text, self.pattern_family, slots=self.attributes()) 596 self._make_coordinate() 597 self.validate() 598 599 def validate(self): 600 self.filtered_out, self.parsing_err = self.filter.filter_out(self)
A general Pattern-based TextMatch. This Python variation consolidates PoliMatch (patterns-of-life = poli) ideas in the Java API.
592 def normalize(self): 593 GeocoordMatch.normalize(self) 594 self.lat_ordinate = DMSOrdinate("lat", self.text, self.pattern_family, slots=self.attributes()) 595 self.lon_ordinate = DMSOrdinate("lon", self.text, self.pattern_family, slots=self.attributes()) 596 self._make_coordinate() 597 self.validate()
Optional, but recommended routine to normalize the matched data. That is, parse fields, uppercase, streamline punctuation, etc. As well, given such normalization result, this is the opportunity to additionally validate the match. :return:
603class DecimalDegMatch(GeocoordMatch): 604 def __init__(self, *args, **kwargs): 605 GeocoordMatch.__init__(self, *args, **kwargs) 606 607 def validate(self): 608 """ 609 Validate a parsed coordinate. 610 55.60, 80.11 -- not valid 611 N55.60, W80.11 -- valid 612 +55.60, -80.11 -- valid 613 S20 E33 -- not valid, by default looking for sub-degree resolution. 614 615 Validate also if the coordinate is a valid range for Lat/Lon. 616 """ 617 if not self.is_valid: 618 return 619 620 lath = self.lat_ordinate.hemi 621 lonh = self.lon_ordinate.hemi 622 valid_hemi = lath and lonh and lath.is_alpha() and lonh.is_alpha() 623 valid_sym = self.lat_ordinate.has_symbols() or self.lon_ordinate.has_symbols() 624 self.is_valid = (valid_hemi or valid_sym) and self.filter_by_resolution() 625 626 self.filtered_out = not self.is_valid 627 628 def normalize(self): 629 GeocoordMatch.normalize(self) 630 self.lat_ordinate = DMSOrdinate("lat", self.text, self.pattern_family, slots=self.attributes()) 631 self.lon_ordinate = DMSOrdinate("lon", self.text, self.pattern_family, slots=self.attributes()) 632 self._make_coordinate() 633 self.validate()
A general Pattern-based TextMatch. This Python variation consolidates PoliMatch (patterns-of-life = poli) ideas in the Java API.
607 def validate(self): 608 """ 609 Validate a parsed coordinate. 610 55.60, 80.11 -- not valid 611 N55.60, W80.11 -- valid 612 +55.60, -80.11 -- valid 613 S20 E33 -- not valid, by default looking for sub-degree resolution. 614 615 Validate also if the coordinate is a valid range for Lat/Lon. 616 """ 617 if not self.is_valid: 618 return 619 620 lath = self.lat_ordinate.hemi 621 lonh = self.lon_ordinate.hemi 622 valid_hemi = lath and lonh and lath.is_alpha() and lonh.is_alpha() 623 valid_sym = self.lat_ordinate.has_symbols() or self.lon_ordinate.has_symbols() 624 self.is_valid = (valid_hemi or valid_sym) and self.filter_by_resolution() 625 626 self.filtered_out = not self.is_valid
Validate a parsed coordinate. 55.60, 80.11 -- not valid N55.60, W80.11 -- valid +55.60, -80.11 -- valid S20 E33 -- not valid, by default looking for sub-degree resolution.
Validate also if the coordinate is a valid range for Lat/Lon.
628 def normalize(self): 629 GeocoordMatch.normalize(self) 630 self.lat_ordinate = DMSOrdinate("lat", self.text, self.pattern_family, slots=self.attributes()) 631 self.lon_ordinate = DMSOrdinate("lon", self.text, self.pattern_family, slots=self.attributes()) 632 self._make_coordinate() 633 self.validate()
Optional, but recommended routine to normalize the matched data. That is, parse fields, uppercase, streamline punctuation, etc. As well, given such normalization result, this is the opportunity to additionally validate the match. :return: