opensextant.extractors.xcoord

  1# coding: utf-8
  2
  3import arrow
  4from pygeodesy.mgrs import Mgrs
  5from pygeodesy.utm import Utm
  6
  7from opensextant import Coordinate
  8from opensextant.FlexPat import PatternExtractor, RegexPatternManager, PatternMatch
  9
 10
 11class ResolutionUncertainty:
 12    UNKNOWN = 100000
 13    REGIONAL = 50000
 14    LOCAL = 5000
 15    SITE = 1000
 16    SPOT = 100
 17    GPS = 10
 18
 19
 20class Specificity:
 21    DEG = 1
 22    SUBDEG = 2
 23    MINUTE = 3
 24    SUBMINUTE = 4
 25    SECOND = 5
 26    SUBSECOND = 6
 27
 28
 29HEMISPHERES = {
 30    "-": -1,
 31    "W": -1,
 32    "S": -1,
 33    "+": 1,
 34    "E": 1,
 35    "N": 1,
 36    None: 1
 37}
 38
 39default_specificity = Specificity.SUBDEG
 40
 41
 42# History - 2024 may - MCU ported from XCoord Java
 43#
 44class XCoord(PatternExtractor):
 45    """
 46    NOTE: a port of XCoord java (org.opensextant.extractors.xcoord, in Xponents-Core)
 47    """
 48
 49    def __init__(self, cfg="geocoord_patterns_py.cfg", debug=False, specificity=Specificity.SUBDEG):
 50        """
 51        :param cfg: patterns config file.
 52        :param specificity: the abstract level of resolution for validating coordinates, e.g., DEGREE, SUB-DEGREE, etc.
 53           use Specificity enumeration
 54        """
 55        PatternExtractor.__init__(self, RegexPatternManager(cfg, debug=debug, testing=debug))
 56        global default_specificity
 57        default_specificity = specificity
 58
 59
 60def hemisphere_factor(sym: str) -> int:
 61    if sym:
 62        return HEMISPHERES.get(sym.upper())
 63    return HEMISPHERES.get(None)
 64
 65
 66def one_value(*args):
 67    """
 68    :param args:
 69    :return: first non-null value.
 70    """
 71    for val in args:
 72        if val is not None:
 73            return val
 74    return None
 75
 76
 77def is_blank(txt: str):
 78    if txt is None:
 79        # Sorry -- you have to determine if obj is string or not first. None does not count.
 80        return False
 81    return txt == '' or txt.strip() == ''
 82
 83
 84def strip(txt: str):
 85    if txt is None:
 86        # Sorry -- you have to determine if obj is string or not first. None does not count.
 87        return False
 88    return txt.strip()
 89
 90
 91class Hemisphere:
 92    def __init__(self, axis, slots=None):
 93        self.axis = axis
 94        self.symbol = None
 95        self.polarity = 0
 96        self.slots = slots
 97        self.normalize()
 98
 99    def is_alpha(self) -> bool:
100        return self.symbol and self.symbol.isalpha()
101
102    def standard_format(self) -> str:
103        """
104        Caution -- test for  presence of symbol first, as decimal value without hemisphere may not be geo coord at all.
105        """
106        if self.polarity >= 0:
107            return "+"
108        if self.polarity < 0:
109            return "-"
110
111    def normalize(self):
112        if not self.slots:
113            return
114        if self.axis == "lon":
115            for slot in ["hemiLon", "hemiLonSign", "hemiLonPre"]:
116                if slot in self.slots:
117                    self.symbol = self.slots.get(slot)
118                    if not self.symbol:
119                        self.polarity = 1
120                        return
121
122        if self.axis == "lat":
123            for slot in ["hemiLat", "hemiLatSign", "hemiLatPre"]:
124                if slot in self.slots:
125                    self.symbol = self.slots.get(slot)
126                    if not self.symbol:
127                        self.polarity = 1
128                        return
129
130        if self.symbol:
131            self.symbol = self.symbol.upper().strip()
132            self.polarity = hemisphere_factor(self.symbol)
133
134
135class DMSOrdinate:
136    SYMBOLS = {"°", "º", "'", "\"", ":", "lat", "lon", "geo", "coord", "deg"}
137
138    def __init__(self, axis: str, text: str, fam: str, slots=None):
139        self.axis = axis
140        self.text = text
141        self.pattern_family = fam
142        self.slots = slots
143        self.degrees = None
144        self.min = None
145        self.seconds = None
146        self.hemi = None
147        self.symbols = set()
148        self.normalized_slots = dict()
149        self.resolution = ResolutionUncertainty.UNKNOWN
150        self.specificity = Specificity.DEG
151        self.normalize()
152
153    def is_valid(self):
154        if self.degrees is None:
155            return False
156        # Must have degrees, in range for the axis
157        if self.axis == "lat":
158            if not -90 < self.degrees < 90:
159                return False
160        if self.axis == "lon":
161            if not -180 < self.degrees < 180:
162                return False
163        # Min and Secs must be in range if specified
164        if self.min is not None and not 0 <= self.min < 60:
165            return False
166        if self.seconds is not None and not 0 <= self.seconds < 60:
167            return False
168
169        return True
170
171    def has_minutes(self):
172        return self.min and (self.specificity == Specificity.MINUTE or self.specificity == Specificity.SUBMINUTE)
173
174    def has_submin(self):
175        return self.specificity == Specificity.SUBMINUTE
176
177    def has_seconds(self):
178        return self.seconds and (self.specificity == Specificity.SECOND or self.specificity == Specificity.SUBSECOND)
179
180    def has_subsec(self):
181        return self.specificity == Specificity.SUBSECOND
182
183    def has_symbols(self):
184        return len(self.symbols) > 0
185
186    def normalize(self):
187        """
188        Parse all slots for the pattern, normalizing found items as both
189        string and numeric representation.  That is, the string portion of the value should be preserved
190        to avoid inserting additional precision not present in the value.  e.g., "30.44"  is 2-sig-figs, and not
191        "30.4400001" or whatever artifacts come with floating point computation.
192
193        separators and symbols present are useful in post-match processing/filtering to weed out false positives.
194        """
195        if not self.slots:
196            return
197        txtnorm = self.text.lower()
198        for sym in DMSOrdinate.SYMBOLS:
199            if sym in txtnorm:
200                self.symbols.add(sym)
201
202        self.hemi = Hemisphere(self.axis, slots=self.slots)
203        if self.axis == "lat":
204            self.digest_lat()
205        elif self.axis == "lon":
206            self.digest_lon()
207
208    def decimal(self):
209        pol = 1
210        if self.hemi:
211            # Validity check of presence of Hemisphere symbol is separate.
212            pol = self.hemi.polarity
213            if not pol:
214                raise Exception("logic error - hemisphere was not resolved")
215
216        if self.seconds is not None and self.min is not None and self.degrees is not None:
217            if self.seconds < 60:
218                return pol * (self.degrees + self.min / 60 + self.seconds / 3600)
219        if self.min is not None and self.degrees is not None:
220            if self.min < 60:
221                return pol * (self.degrees + self.min / 60)
222        if self.degrees is not None:
223            return pol * self.degrees
224        return None
225
226    def digest_lat(self):
227        self._digest_slots("Lat")
228
229    def digest_lon(self):
230        self._digest_slots("Lon")
231
232    def _digest_slots(self, axis):
233        """
234        Fields or slots are named xxxLatxx or xxxLonxx
235        """
236        if self.pattern_family == "DMS":
237            min_sec_sep = self.slots.get(f"ms{axis}Sep")
238            deg_min_sep = self.slots.get(f"dm{axis}Sep")
239            if min_sec_sep and deg_min_sep and min_sec_sep == "." and min_sec_sep != deg_min_sep:
240                # valid coordinate, but separators like "DD MM.ss" suggest more DM pattern
241                #                      whereas          "DD.MM.SS" with consistent separators is DMS.
242                return
243
244        # DEGREES
245        deg = self.get_int(f"deg{axis}", "deg")
246        deg2 = self.get_int(f"dmsDeg{axis}", "deg")
247        deg3 = self.get_decimal(f"decDeg{axis}", "deg")
248        self.degrees = one_value(deg, deg2, deg3)
249        if self.degrees is not None:
250            self.specificity = Specificity.DEG
251            if deg3 is not None:
252                self.specificity = Specificity.SUBDEG
253        else:
254            return
255
256        #  MINUTES
257        minutes = self.get_int(f"min{axis}", "min")
258        minutes2 = self.get_int(f"dmsMin{axis}", "min")
259        minutes3 = self.get_decimal(f"decMin{axis}", "min")
260        mindash = self.get_decimal(f"decMin{axis}3", "min")
261
262        self.min = one_value(minutes, minutes2, minutes3, mindash)
263        if self.min is not None:
264            self.specificity = Specificity.MINUTE
265
266            min_fract = self.get_fractional(f"fractMin{axis}", "fmin")
267            min_fract2 = self.get_fractional(f"fractMin{axis}3", "fmin")
268            # variation 2, is a 3-digit or longer fraction
269
270            fmin = one_value(min_fract, min_fract2)
271            if fmin is not None:
272                self.specificity = Specificity.SUBMINUTE
273                self.min += fmin
274
275        else:
276            return
277
278        # SECONDS
279        sec = self.get_int(f"sec{axis}", "sec")
280        sec2 = self.get_int(f"dmsSec{axis}", "sec")
281        self.seconds = one_value(sec, sec2)
282        if self.seconds is not None:
283            self.specificity = Specificity.SECOND
284
285            fsec = self.get_fractional(f"fractSec{axis}", "fsec")
286            fsec2 = self.get_fractional(f"fractSec{axis}Opt", "fsec")
287            fseconds = one_value(fsec, fsec2)
288            if fseconds is not None:
289                self.specificity = Specificity.SUBSECOND
290                self.seconds += fseconds
291        return
292
293    def get_int(self, f, fnorm):
294        if f in self.slots:
295            val = self.slots[f]
296            self.normalized_slots[fnorm] = self.slots[f]
297            return int(val)
298
299    def get_decimal(self, f, fnorm):
300        """
301        find slot and convert pattern "-dddd..." to 0.dddd...
302        Also, if fraction is simply "dddd..." then insert "." at front.
303        """
304        if f in self.slots:
305            val = self.slots[f]
306            if "-" in val:
307                val = val.replace("-", ".")
308            self.normalized_slots[fnorm] = val
309            return float(val)
310
311    def get_fractional(self, f, fnorm):
312        """
313        find slot and convert pattern "-dddd..." to 0.dddd...
314        Also if fraction is simply "dddd..." then insert "." at front.
315        """
316        if f in self.slots:
317            val = self.slots[f]
318            if not val:
319                return None
320            if val.startswith("-"):
321                val = val.replace("-", ".")
322            elif not val.startswith("."):
323                val = f".{val}"
324            self.normalized_slots[fnorm] = val
325            return float(val)
326
327
328class GeocoordMatch(PatternMatch):
329    def __init__(self, *args, **kwargs):
330        PatternMatch.__init__(self, *args, **kwargs)
331        self.case = PatternMatch.UPPER_CASE
332        self.geodetic = None
333        self.coordinate: Coordinate = None
334        self.parsing_err: str = None
335        self.lat_ordinate: DMSOrdinate = None
336        self.lon_ordinate: DMSOrdinate = None
337        self.filter: GeocoordFilter = None
338        self.pattern_family = self.pattern_id.split("-", 1)[0]
339
340    def __str__(self):
341        return f"{self.text}"
342
343    def normalize(self):
344        PatternMatch.normalize(self)
345        self.is_valid = False
346        self.filtered_out = True
347
348    def _make_coordinate(self):
349        if self.lat_ordinate and self.lon_ordinate:
350            self.is_valid = self.lon_ordinate.is_valid() and self.lon_ordinate.is_valid()
351            if self.is_valid:
352                # continue to weed out noise.
353                self.coordinate = Coordinate(None,
354                                             lat=self.lat_ordinate.decimal(),
355                                             lon=self.lon_ordinate.decimal())
356                self.is_valid = self.coordinate.validate()
357        elif self.geodetic:
358            self.is_valid = True
359            self.filtered_out = False
360            LL = self.geodetic.toLatLon()
361            self.coordinate = Coordinate(None, lat=LL.lat, lon=LL.lon)
362            # These are parsed by UTM and MGRS libraries, so coordinate is assumed valid.
363
364    def filter_by_resolution(self):
365        """ Check specificity of ordinates -- as parsed, if LAT or LON has the minimum level of detail
366
367                40N      -- could be "40 North"
368               +40.0000  -- also "40 North", but precision is specified to 4sigfig.
369               +40:00:00 -- well, could also be an hour marker ~ 40 hours
370
371        :return: TRUE if coordinate is specific and resolution is high enough.
372        """
373        if not self.lat_ordinate or not self.lon_ordinate:
374            # If unset, we'll simply filter OUT
375            return False
376        lat_valid = self.lat_ordinate.specificity >= default_specificity
377        lon_valid = self.lon_ordinate.specificity >= default_specificity
378        return lat_valid and lon_valid
379
380
381class GeocoordFilter:
382    def filter_out(self, m: GeocoordMatch) -> tuple:
383        return False, "reason"
384
385
386class MGRSFilter(GeocoordFilter):
387    def __init__(self):
388        GeocoordFilter.__init__(self)
389        self.date_formats = ["DDMMMYYYY", "DMMMYYHHmm", "DDMMMYYHHmm", "DDMMMYY", "DMMMYY", "HHZZZYYYY"]
390        self.sequences = ["1234", "123456", "12345678", "1234567890"]
391        self.stop_terms = {"PER", "SEC", "UTC", "GMT", "GAL", "USC", "CAN",
392                           "JAN", "FEB", "MAR", "APR", "MAY", "JUN",
393                           "JUL", "AUG", "SEP", "OCT", "NOV", "DEC"}
394        self.today = arrow.utcnow()
395        self.YEAR = self.today.date().year
396        self.YY = self.YEAR - 2000
397        self.RECENT_YEAR_THRESHOLD = 30
398
399    def filter_out(self, mgrs: GeocoordMatch) -> tuple:
400        """
401        :return: True if filtered out, false positive.
402        """
403        # MGRS rules:     upper case alphanumeric, greater than 6 chars,
404        #    subjective:
405        #    - is not a digit sequence;
406        #    - is not a recent date;
407        #    - is not a rate ('NNN per LB');
408        #    - is not time with 'sec'
409        # Lexical filters:
410        if not mgrs.is_valid:
411            # parsed earlier as invalid.
412            return True, "invalid"
413
414        if not (mgrs.text.isupper() and len(mgrs.text.replace(" ", "")) > 6):
415            return True, "lexical"
416        if "\t" in mgrs.text or "\n" in mgrs.text:
417            return True, "format-ws"
418        for term in self.stop_terms:
419            if term in mgrs.textnorm:
420                return True, "measure"
421        for seq in self.sequences:
422            if seq in mgrs.textnorm:
423                return True, "digit-seq"
424
425        # Date Filter
426        for fmt in self.date_formats:
427            fmtlen = len(fmt)
428            date_test = mgrs.textnorm[0:fmtlen]
429            try:
430                dt = arrow.get(date_test, fmt)
431                if self._is_recent(dt):
432                    return True, "date"
433            except Exception as parse_err:
434                pass
435
436        # Not filtered out
437        return False, None
438
439    def _is_recent(self, dt: arrow):
440        """
441        checks if a year slot represents a recent YYYY or YY year.
442        """
443        return abs(dt.date().year - self.YEAR) <= self.RECENT_YEAR_THRESHOLD
444
445
446class DMSFilter(GeocoordFilter):
447    def __init__(self):
448        GeocoordFilter.__init__(self)
449        self.date_formats = ["YY-DD-MM HH:mm:ss", "MM-DD-YY HH:mm:ss"]
450
451    def filter_out(self, dms: GeocoordMatch) -> tuple:
452        """
453        Easy filter -- if puncutation matches, this is an easy pattern to ignore.
454        :return: True if filtered out, false positive.
455        """
456        if dms.is_valid:
457            if not dms.filter_by_resolution():
458                # Not valid -- or at least not meeting users level of specificity.
459                return True
460
461        if dms.is_valid:
462            if dms.text[0].isalpha():
463                return False, None
464            for fmt in self.date_formats:
465                try:
466                    dt = arrow.get(dms.text, fmt)
467                    # Recency matters not.  Tests are literal date formats
468                    return True, "date"
469                except Exception as err:
470                    pass
471            # Not filtered. Is valid.
472            return False, None
473        # Filter out. invalid.
474        return True, "invalid"
475
476
477mgrs_filter = MGRSFilter()
478dms_filter = DMSFilter()
479
480
481class MGRSMatch(GeocoordMatch):
482    def __init__(self, *args, **kwargs):
483        GeocoordMatch.__init__(self, *args, **kwargs)
484        self.filter = mgrs_filter
485
486    def validate(self):
487        self.filtered_out, self.parsing_err = self.filter.filter_out(self)
488
489    def normalize(self):
490        GeocoordMatch.normalize(self)
491        slots = self.attributes()
492        self.textnorm = self.textnorm.replace(" ", "")
493
494        z = slots.get("MGRSZone")
495        q = slots.get("MGRSQuad")
496        east_north = slots.get("Easting_Northing")
497
498        e, n = None, None
499        if " " in east_north:
500            e, n = east_north.split(" ", 1)
501            le = len(e)
502            ln = len(n)
503            if le != ln:
504                resolution = min(le, ln)
505                e = e[:resolution]
506                n = n[:resolution]
507        elif len(east_north) % 2 == 0:
508            resolution = int(len(east_north) / 2)
509            e, n = east_north[0:resolution], east_north[resolution:]
510
511        if e and n:
512            try:
513                e = int(e)
514                n = int(n)
515                self.geodetic = Mgrs(zone=z, EN=q, easting=e, northing=n)
516                self._make_coordinate()
517                self.validate()
518            except Exception as err:
519                self.parsing_err = str(err)
520
521
522class UTMMatch(GeocoordMatch):
523    def __init__(self, *args, **kwargs):
524        GeocoordMatch.__init__(self, *args, **kwargs)
525
526    def normalize(self):
527        GeocoordMatch.normalize(self)
528        slots = self.attributes()
529
530        z = slots.get("UTMZone")
531        z1 = slots.get("UTMZoneZZ")  # 0-5\d
532        z2 = slots.get("UTMZoneZ")  # \d
533
534        try:
535            ZZ = int(one_value(z, z1, z2))
536            band = slots.get("UTMBand")
537            if not band:
538                return
539
540            hemi = band[0]
541            e = slots.get("UTMEasting")
542            n = slots.get("UTMNorthing")
543            if e and n:
544                self.geodetic = Utm(zone=ZZ, hemisphere=hemi, band=band, easting=int(e), northing=int(n))
545                self._make_coordinate()
546        except Exception as err:
547            self.parsing_err = str(err)
548
549
550class DegMinMatch(GeocoordMatch):
551    def __init__(self, *args, **kwargs):
552        GeocoordMatch.__init__(self, *args, **kwargs)
553
554    def validate(self):
555
556        slots = self.attributes()
557        if self.is_valid:
558            # Punct - separators must match for DM  patterns.
559            lat_sep = strip(slots.get("dmLatSep"))
560            lon_sep = strip(slots.get("dmLonSep"))
561            if (lat_sep or lon_sep) and (lat_sep != lon_sep):
562                self.is_valid = False
563
564        # TODO: evaluate other dashes: GeocoordNormalization eval dashes, eval punct
565        self.filtered_out = not self.is_valid
566
567    def normalize(self):
568        GeocoordMatch.normalize(self)
569
570        # DEG MIN (DM) patterns, with fractional min
571        #
572        # dmsDegLat > [-\s]? < dmsMinLat > < hemiLat > < fractMinLat > < latlonSep > < dmsDegLon > [-\s]? < dmsMinLon > < hemiLon > < fractMinLon
573        #
574        # degLat > < dmLatSep >\s? < minLat > < fractMinLat >? < msLatSep >?\s? < hemiLat > < latlonSep3 > < degLon > < dmLonSep >\s? < minLon > < fractMinLon
575        #
576        # < hemiLatPre >\s? < degLat > < dmLatSep >\s? < minLat > < fractMinLat >? < msLatSep >? < latlonSep3 >
577        #     < hemiLonPre >\s? < degLon > < dmLonSep >\s? < minLon > < fractMinLon >? < msLonSep >?
578
579        # TODO: conditions that invalidate this pattern?
580        self.lat_ordinate = DMSOrdinate("lat", self.text, self.pattern_family, slots=self.attributes())
581        self.lon_ordinate = DMSOrdinate("lon", self.text, self.pattern_family, slots=self.attributes())
582        self._make_coordinate()
583        self.validate()
584
585
586class DegMinSecMatch(GeocoordMatch):
587    def __init__(self, *args, **kwargs):
588        GeocoordMatch.__init__(self, *args, **kwargs)
589        self.filter = dms_filter
590
591    def normalize(self):
592        GeocoordMatch.normalize(self)
593        self.lat_ordinate = DMSOrdinate("lat", self.text, self.pattern_family, slots=self.attributes())
594        self.lon_ordinate = DMSOrdinate("lon", self.text, self.pattern_family, slots=self.attributes())
595        self._make_coordinate()
596        self.validate()
597
598    def validate(self):
599        self.filtered_out, self.parsing_err = self.filter.filter_out(self)
600
601
602class DecimalDegMatch(GeocoordMatch):
603    def __init__(self, *args, **kwargs):
604        GeocoordMatch.__init__(self, *args, **kwargs)
605
606    def validate(self):
607        """
608        Validate a parsed coordinate.
609             55.60, 80.11  -- not valid
610             N55.60, W80.11  -- valid
611             +55.60, -80.11  -- valid
612             S20 E33         -- not valid, by default looking for sub-degree resolution.
613
614        Validate also if the coordinate is a valid range for Lat/Lon.
615        """
616        if not self.is_valid:
617            return
618
619        lath = self.lat_ordinate.hemi
620        lonh = self.lon_ordinate.hemi
621        valid_hemi = lath and lonh and lath.is_alpha() and lonh.is_alpha()
622        valid_sym = self.lat_ordinate.has_symbols() or self.lon_ordinate.has_symbols()
623        self.is_valid = (valid_hemi or valid_sym) and self.filter_by_resolution()
624
625        self.filtered_out = not self.is_valid
626
627    def normalize(self):
628        GeocoordMatch.normalize(self)
629        self.lat_ordinate = DMSOrdinate("lat", self.text, self.pattern_family, slots=self.attributes())
630        self.lon_ordinate = DMSOrdinate("lon", self.text, self.pattern_family, slots=self.attributes())
631        self._make_coordinate()
632        self.validate()
class XCoord(opensextant.FlexPat.PatternExtractor):
45class XCoord(PatternExtractor):
46    """
47    NOTE: a port of XCoord java (org.opensextant.extractors.xcoord, in Xponents-Core)
48    """
49
50    def __init__(self, cfg="geocoord_patterns_py.cfg", debug=False, specificity=Specificity.SUBDEG):
51        """
52        :param cfg: patterns config file.
53        :param specificity: the abstract level of resolution for validating coordinates, e.g., DEGREE, SUB-DEGREE, etc.
54           use Specificity enumeration
55        """
56        PatternExtractor.__init__(self, RegexPatternManager(cfg, debug=debug, testing=debug))
57        global default_specificity
58        default_specificity = specificity

NOTE: a port of XCoord java (org.opensextant.extractors.xcoord, in Xponents-Core)

XCoord(cfg='geocoord_patterns_py.cfg', debug=False, specificity=2)
50    def __init__(self, cfg="geocoord_patterns_py.cfg", debug=False, specificity=Specificity.SUBDEG):
51        """
52        :param cfg: patterns config file.
53        :param specificity: the abstract level of resolution for validating coordinates, e.g., DEGREE, SUB-DEGREE, etc.
54           use Specificity enumeration
55        """
56        PatternExtractor.__init__(self, RegexPatternManager(cfg, debug=debug, testing=debug))
57        global default_specificity
58        default_specificity = specificity

:param cfg: patterns config file. :param specificity: the abstract level of resolution for validating coordinates, e.g., DEGREE, SUB-DEGREE, etc. use Specificity enumeration

def one_value(*args):
67def one_value(*args):
68    """
69    :param args:
70    :return: first non-null value.
71    """
72    for val in args:
73        if val is not None:
74            return val
75    return None

:param args: :return: first non-null value.

class GeocoordMatch(opensextant.FlexPat.PatternMatch):
329class GeocoordMatch(PatternMatch):
330    def __init__(self, *args, **kwargs):
331        PatternMatch.__init__(self, *args, **kwargs)
332        self.case = PatternMatch.UPPER_CASE
333        self.geodetic = None
334        self.coordinate: Coordinate = None
335        self.parsing_err: str = None
336        self.lat_ordinate: DMSOrdinate = None
337        self.lon_ordinate: DMSOrdinate = None
338        self.filter: GeocoordFilter = None
339        self.pattern_family = self.pattern_id.split("-", 1)[0]
340
341    def __str__(self):
342        return f"{self.text}"
343
344    def normalize(self):
345        PatternMatch.normalize(self)
346        self.is_valid = False
347        self.filtered_out = True
348
349    def _make_coordinate(self):
350        if self.lat_ordinate and self.lon_ordinate:
351            self.is_valid = self.lon_ordinate.is_valid() and self.lon_ordinate.is_valid()
352            if self.is_valid:
353                # continue to weed out noise.
354                self.coordinate = Coordinate(None,
355                                             lat=self.lat_ordinate.decimal(),
356                                             lon=self.lon_ordinate.decimal())
357                self.is_valid = self.coordinate.validate()
358        elif self.geodetic:
359            self.is_valid = True
360            self.filtered_out = False
361            LL = self.geodetic.toLatLon()
362            self.coordinate = Coordinate(None, lat=LL.lat, lon=LL.lon)
363            # These are parsed by UTM and MGRS libraries, so coordinate is assumed valid.
364
365    def filter_by_resolution(self):
366        """ Check specificity of ordinates -- as parsed, if LAT or LON has the minimum level of detail
367
368                40N      -- could be "40 North"
369               +40.0000  -- also "40 North", but precision is specified to 4sigfig.
370               +40:00:00 -- well, could also be an hour marker ~ 40 hours
371
372        :return: TRUE if coordinate is specific and resolution is high enough.
373        """
374        if not self.lat_ordinate or not self.lon_ordinate:
375            # If unset, we'll simply filter OUT
376            return False
377        lat_valid = self.lat_ordinate.specificity >= default_specificity
378        lon_valid = self.lon_ordinate.specificity >= default_specificity
379        return lat_valid and lon_valid

A general Pattern-based TextMatch. This Python variation consolidates PoliMatch (patterns-of-life = poli) ideas in the Java API.

def normalize(self):
344    def normalize(self):
345        PatternMatch.normalize(self)
346        self.is_valid = False
347        self.filtered_out = True

Optional, but recommended routine to normalize the matched data. That is, parse fields, uppercase, streamline punctuation, etc. As well, given such normalization result, this is the opportunity to additionally validate the match. :return:

def filter_by_resolution(self):
365    def filter_by_resolution(self):
366        """ Check specificity of ordinates -- as parsed, if LAT or LON has the minimum level of detail
367
368                40N      -- could be "40 North"
369               +40.0000  -- also "40 North", but precision is specified to 4sigfig.
370               +40:00:00 -- well, could also be an hour marker ~ 40 hours
371
372        :return: TRUE if coordinate is specific and resolution is high enough.
373        """
374        if not self.lat_ordinate or not self.lon_ordinate:
375            # If unset, we'll simply filter OUT
376            return False
377        lat_valid = self.lat_ordinate.specificity >= default_specificity
378        lon_valid = self.lon_ordinate.specificity >= default_specificity
379        return lat_valid and lon_valid

Check specificity of ordinates -- as parsed, if LAT or LON has the minimum level of detail

    40N      -- could be "40 North"
   +40.0000  -- also "40 North", but precision is specified to 4sigfig.
   +40:00:00 -- well, could also be an hour marker ~ 40 hours

:return: TRUE if coordinate is specific and resolution is high enough.

class MGRSMatch(GeocoordMatch):
482class MGRSMatch(GeocoordMatch):
483    def __init__(self, *args, **kwargs):
484        GeocoordMatch.__init__(self, *args, **kwargs)
485        self.filter = mgrs_filter
486
487    def validate(self):
488        self.filtered_out, self.parsing_err = self.filter.filter_out(self)
489
490    def normalize(self):
491        GeocoordMatch.normalize(self)
492        slots = self.attributes()
493        self.textnorm = self.textnorm.replace(" ", "")
494
495        z = slots.get("MGRSZone")
496        q = slots.get("MGRSQuad")
497        east_north = slots.get("Easting_Northing")
498
499        e, n = None, None
500        if " " in east_north:
501            e, n = east_north.split(" ", 1)
502            le = len(e)
503            ln = len(n)
504            if le != ln:
505                resolution = min(le, ln)
506                e = e[:resolution]
507                n = n[:resolution]
508        elif len(east_north) % 2 == 0:
509            resolution = int(len(east_north) / 2)
510            e, n = east_north[0:resolution], east_north[resolution:]
511
512        if e and n:
513            try:
514                e = int(e)
515                n = int(n)
516                self.geodetic = Mgrs(zone=z, EN=q, easting=e, northing=n)
517                self._make_coordinate()
518                self.validate()
519            except Exception as err:
520                self.parsing_err = str(err)

A general Pattern-based TextMatch. This Python variation consolidates PoliMatch (patterns-of-life = poli) ideas in the Java API.

def normalize(self):
490    def normalize(self):
491        GeocoordMatch.normalize(self)
492        slots = self.attributes()
493        self.textnorm = self.textnorm.replace(" ", "")
494
495        z = slots.get("MGRSZone")
496        q = slots.get("MGRSQuad")
497        east_north = slots.get("Easting_Northing")
498
499        e, n = None, None
500        if " " in east_north:
501            e, n = east_north.split(" ", 1)
502            le = len(e)
503            ln = len(n)
504            if le != ln:
505                resolution = min(le, ln)
506                e = e[:resolution]
507                n = n[:resolution]
508        elif len(east_north) % 2 == 0:
509            resolution = int(len(east_north) / 2)
510            e, n = east_north[0:resolution], east_north[resolution:]
511
512        if e and n:
513            try:
514                e = int(e)
515                n = int(n)
516                self.geodetic = Mgrs(zone=z, EN=q, easting=e, northing=n)
517                self._make_coordinate()
518                self.validate()
519            except Exception as err:
520                self.parsing_err = str(err)

Optional, but recommended routine to normalize the matched data. That is, parse fields, uppercase, streamline punctuation, etc. As well, given such normalization result, this is the opportunity to additionally validate the match. :return:

class UTMMatch(GeocoordMatch):
523class UTMMatch(GeocoordMatch):
524    def __init__(self, *args, **kwargs):
525        GeocoordMatch.__init__(self, *args, **kwargs)
526
527    def normalize(self):
528        GeocoordMatch.normalize(self)
529        slots = self.attributes()
530
531        z = slots.get("UTMZone")
532        z1 = slots.get("UTMZoneZZ")  # 0-5\d
533        z2 = slots.get("UTMZoneZ")  # \d
534
535        try:
536            ZZ = int(one_value(z, z1, z2))
537            band = slots.get("UTMBand")
538            if not band:
539                return
540
541            hemi = band[0]
542            e = slots.get("UTMEasting")
543            n = slots.get("UTMNorthing")
544            if e and n:
545                self.geodetic = Utm(zone=ZZ, hemisphere=hemi, band=band, easting=int(e), northing=int(n))
546                self._make_coordinate()
547        except Exception as err:
548            self.parsing_err = str(err)

A general Pattern-based TextMatch. This Python variation consolidates PoliMatch (patterns-of-life = poli) ideas in the Java API.

def normalize(self):
527    def normalize(self):
528        GeocoordMatch.normalize(self)
529        slots = self.attributes()
530
531        z = slots.get("UTMZone")
532        z1 = slots.get("UTMZoneZZ")  # 0-5\d
533        z2 = slots.get("UTMZoneZ")  # \d
534
535        try:
536            ZZ = int(one_value(z, z1, z2))
537            band = slots.get("UTMBand")
538            if not band:
539                return
540
541            hemi = band[0]
542            e = slots.get("UTMEasting")
543            n = slots.get("UTMNorthing")
544            if e and n:
545                self.geodetic = Utm(zone=ZZ, hemisphere=hemi, band=band, easting=int(e), northing=int(n))
546                self._make_coordinate()
547        except Exception as err:
548            self.parsing_err = str(err)

Optional, but recommended routine to normalize the matched data. That is, parse fields, uppercase, streamline punctuation, etc. As well, given such normalization result, this is the opportunity to additionally validate the match. :return:

class DegMinMatch(GeocoordMatch):
551class DegMinMatch(GeocoordMatch):
552    def __init__(self, *args, **kwargs):
553        GeocoordMatch.__init__(self, *args, **kwargs)
554
555    def validate(self):
556
557        slots = self.attributes()
558        if self.is_valid:
559            # Punct - separators must match for DM  patterns.
560            lat_sep = strip(slots.get("dmLatSep"))
561            lon_sep = strip(slots.get("dmLonSep"))
562            if (lat_sep or lon_sep) and (lat_sep != lon_sep):
563                self.is_valid = False
564
565        # TODO: evaluate other dashes: GeocoordNormalization eval dashes, eval punct
566        self.filtered_out = not self.is_valid
567
568    def normalize(self):
569        GeocoordMatch.normalize(self)
570
571        # DEG MIN (DM) patterns, with fractional min
572        #
573        # dmsDegLat > [-\s]? < dmsMinLat > < hemiLat > < fractMinLat > < latlonSep > < dmsDegLon > [-\s]? < dmsMinLon > < hemiLon > < fractMinLon
574        #
575        # degLat > < dmLatSep >\s? < minLat > < fractMinLat >? < msLatSep >?\s? < hemiLat > < latlonSep3 > < degLon > < dmLonSep >\s? < minLon > < fractMinLon
576        #
577        # < hemiLatPre >\s? < degLat > < dmLatSep >\s? < minLat > < fractMinLat >? < msLatSep >? < latlonSep3 >
578        #     < hemiLonPre >\s? < degLon > < dmLonSep >\s? < minLon > < fractMinLon >? < msLonSep >?
579
580        # TODO: conditions that invalidate this pattern?
581        self.lat_ordinate = DMSOrdinate("lat", self.text, self.pattern_family, slots=self.attributes())
582        self.lon_ordinate = DMSOrdinate("lon", self.text, self.pattern_family, slots=self.attributes())
583        self._make_coordinate()
584        self.validate()

A general Pattern-based TextMatch. This Python variation consolidates PoliMatch (patterns-of-life = poli) ideas in the Java API.

def normalize(self):
568    def normalize(self):
569        GeocoordMatch.normalize(self)
570
571        # DEG MIN (DM) patterns, with fractional min
572        #
573        # dmsDegLat > [-\s]? < dmsMinLat > < hemiLat > < fractMinLat > < latlonSep > < dmsDegLon > [-\s]? < dmsMinLon > < hemiLon > < fractMinLon
574        #
575        # degLat > < dmLatSep >\s? < minLat > < fractMinLat >? < msLatSep >?\s? < hemiLat > < latlonSep3 > < degLon > < dmLonSep >\s? < minLon > < fractMinLon
576        #
577        # < hemiLatPre >\s? < degLat > < dmLatSep >\s? < minLat > < fractMinLat >? < msLatSep >? < latlonSep3 >
578        #     < hemiLonPre >\s? < degLon > < dmLonSep >\s? < minLon > < fractMinLon >? < msLonSep >?
579
580        # TODO: conditions that invalidate this pattern?
581        self.lat_ordinate = DMSOrdinate("lat", self.text, self.pattern_family, slots=self.attributes())
582        self.lon_ordinate = DMSOrdinate("lon", self.text, self.pattern_family, slots=self.attributes())
583        self._make_coordinate()
584        self.validate()

Optional, but recommended routine to normalize the matched data. That is, parse fields, uppercase, streamline punctuation, etc. As well, given such normalization result, this is the opportunity to additionally validate the match. :return:

class DegMinSecMatch(GeocoordMatch):
587class DegMinSecMatch(GeocoordMatch):
588    def __init__(self, *args, **kwargs):
589        GeocoordMatch.__init__(self, *args, **kwargs)
590        self.filter = dms_filter
591
592    def normalize(self):
593        GeocoordMatch.normalize(self)
594        self.lat_ordinate = DMSOrdinate("lat", self.text, self.pattern_family, slots=self.attributes())
595        self.lon_ordinate = DMSOrdinate("lon", self.text, self.pattern_family, slots=self.attributes())
596        self._make_coordinate()
597        self.validate()
598
599    def validate(self):
600        self.filtered_out, self.parsing_err = self.filter.filter_out(self)

A general Pattern-based TextMatch. This Python variation consolidates PoliMatch (patterns-of-life = poli) ideas in the Java API.

def normalize(self):
592    def normalize(self):
593        GeocoordMatch.normalize(self)
594        self.lat_ordinate = DMSOrdinate("lat", self.text, self.pattern_family, slots=self.attributes())
595        self.lon_ordinate = DMSOrdinate("lon", self.text, self.pattern_family, slots=self.attributes())
596        self._make_coordinate()
597        self.validate()

Optional, but recommended routine to normalize the matched data. That is, parse fields, uppercase, streamline punctuation, etc. As well, given such normalization result, this is the opportunity to additionally validate the match. :return:

class DecimalDegMatch(GeocoordMatch):
603class DecimalDegMatch(GeocoordMatch):
604    def __init__(self, *args, **kwargs):
605        GeocoordMatch.__init__(self, *args, **kwargs)
606
607    def validate(self):
608        """
609        Validate a parsed coordinate.
610             55.60, 80.11  -- not valid
611             N55.60, W80.11  -- valid
612             +55.60, -80.11  -- valid
613             S20 E33         -- not valid, by default looking for sub-degree resolution.
614
615        Validate also if the coordinate is a valid range for Lat/Lon.
616        """
617        if not self.is_valid:
618            return
619
620        lath = self.lat_ordinate.hemi
621        lonh = self.lon_ordinate.hemi
622        valid_hemi = lath and lonh and lath.is_alpha() and lonh.is_alpha()
623        valid_sym = self.lat_ordinate.has_symbols() or self.lon_ordinate.has_symbols()
624        self.is_valid = (valid_hemi or valid_sym) and self.filter_by_resolution()
625
626        self.filtered_out = not self.is_valid
627
628    def normalize(self):
629        GeocoordMatch.normalize(self)
630        self.lat_ordinate = DMSOrdinate("lat", self.text, self.pattern_family, slots=self.attributes())
631        self.lon_ordinate = DMSOrdinate("lon", self.text, self.pattern_family, slots=self.attributes())
632        self._make_coordinate()
633        self.validate()

A general Pattern-based TextMatch. This Python variation consolidates PoliMatch (patterns-of-life = poli) ideas in the Java API.

def validate(self):
607    def validate(self):
608        """
609        Validate a parsed coordinate.
610             55.60, 80.11  -- not valid
611             N55.60, W80.11  -- valid
612             +55.60, -80.11  -- valid
613             S20 E33         -- not valid, by default looking for sub-degree resolution.
614
615        Validate also if the coordinate is a valid range for Lat/Lon.
616        """
617        if not self.is_valid:
618            return
619
620        lath = self.lat_ordinate.hemi
621        lonh = self.lon_ordinate.hemi
622        valid_hemi = lath and lonh and lath.is_alpha() and lonh.is_alpha()
623        valid_sym = self.lat_ordinate.has_symbols() or self.lon_ordinate.has_symbols()
624        self.is_valid = (valid_hemi or valid_sym) and self.filter_by_resolution()
625
626        self.filtered_out = not self.is_valid

Validate a parsed coordinate. 55.60, 80.11 -- not valid N55.60, W80.11 -- valid +55.60, -80.11 -- valid S20 E33 -- not valid, by default looking for sub-degree resolution.

Validate also if the coordinate is a valid range for Lat/Lon.

def normalize(self):
628    def normalize(self):
629        GeocoordMatch.normalize(self)
630        self.lat_ordinate = DMSOrdinate("lat", self.text, self.pattern_family, slots=self.attributes())
631        self.lon_ordinate = DMSOrdinate("lon", self.text, self.pattern_family, slots=self.attributes())
632        self._make_coordinate()
633        self.validate()

Optional, but recommended routine to normalize the matched data. That is, parse fields, uppercase, streamline punctuation, etc. As well, given such normalization result, this is the opportunity to additionally validate the match. :return: