petrichor on the breeze: regex

Showing posts with label regex. Show all posts

Wednesday, June 23, 2010

python challenge: room 3

python challenge: room 3

here's my solution:

import re

infile = open('c:\\python26\\MyProgs\\inputfile_room3.txt' , 'r')
instring = infile.read()
infile.close()


p = re.compile( r'[^A-Z][A-Z]{3}(?P<answer>[a-z])[A-Z]{3}[^A-Z]') 
print p.findall(instring)


#solution is "linkedlist"

solution url:
http://www.pythonchallenge.com/pc/def/linkedlist.php

Tuesday, June 22, 2010

Thursday, March 11, 2010

awk: regular expressions and group submatch capture

b=`ssh root@registration.authinfra.net 'rpm -qg pi'`

echo $b | awk 'match($0, "pi-multihome-[[:digit:]].[[:digit:]]-([[:digit:]]*)", a) {print a[1]}'

OUTPUT: 125324

Wednesday, May 27, 2009

c++: regular expression tester with boost

#include 
#include 
#include   // point this to your Boost.Regex lib

using namespace std;

int main( ) 
{
    std::string s, sre;
    boost::regex re;

    while(true)
    {
        cout << "Expression: ";
        cin >> sre;
        if (sre == "quit")
        {
            break;
        }
        cout << "String:     ";
        cin >> s;

        try
        {
            // Set up the regular expression for case-insensitivity
            re.assign(sre, boost::regex_constants::icase);
        }
        catch (boost::regex_error& e)
        {
            cout << sre << " is not a valid regular expression: \""
            << e.what() << "\"" << endl;
            continue;
        }
        if (boost::regex_match(s, re))
        {
            cout << re << " matches " << s << endl;
        }
    }
}

Tuesday, May 26, 2009

c++: a simple boost::regex example

void piMozyAuthUnitTest::testBoostRegex1()
{
    std::string s, sre;
    boost::regex re;
    boost::cmatch matches;

    while(true)
    {
        cout << "Expression: ";
        cin >> sre;
        if (sre == "quit")
            break;
        

        cout << "String:     ";
        cin >> s;

        try
        {
            // Assignment and construction initialize the FSM used
            // for regexp parsing
            re = sre;
        }
        catch (boost::regex_error& e)
        {
            cout << sre << " is not a valid regular expression: \""
            << e.what() << "\"" << endl;
            continue;
        }
        // if (boost::regex_match(s.begin(), s.end(), re))
        if (boost::regex_match(s.c_str(), matches, re))
        {
            // matches[0] contains the original string.  matches[n]
            // contains a sub_match object for each matching
            // subexpression
            for (int i = 1; i < matches.size(); i++)
            {
                // sub_match::first and sub_match::second are iterators that
                // refer to the first and one past the last chars of the
                // matching subexpression
                string match(matches[i].first, matches[i].second);
                cout << "\tmatches[" << i << "] = " << match << endl;
            }
        }
        else
        {
            cout << "The regexp \"" << re << "\" does not match \"" << s << "\"" << endl;
        }
    }

}

Wednesday, April 16, 2008

perl: matching an IPv4 address

SOLUTION 1: Jeffrey Friedl's "Mastering Regular Expressions"
source

my $ReIpNum = qr{([01]?\d\d?|2[0-4]\d|25[0-5])};
my $ReIpAddr = qr{^$ReIpNum\.$ReIpNum\.$ReIpNum\.$ReIpNum$};

my %ips = ('0.0.0.0' =&gt; 1,
           '1.2.3.4' =&gt; 1,
           '255.255.255.255' =&gt; 1,
           '000.34.2000.2' =&gt; 0,
           '' =&gt; 0,
           '24.23.23.' =&gt; 0);

for my $ip(keys %ips) {
    die "Failed: $ip"
    unless (($ip =~ m{$ReIpAddr}) == $ips{$ip});
    print "$ip passed\n";
}

SOLUTION 2: USE Regexp::Common
source

#!/bin/perl
use Regexp::Common;

while() {
    if(/$RE{net}{IPv4}{dec}{-keep}/) {
        print "IP Address: $1\n";
    }
}

__DATA__
24.113.50.245
0.42.523.2
255.242.52.4
2.5.3

Discussion:

IP addresses are difficult to match using a simple regular expression, because the regular expression must verify that the IP address against which it is matching is valid. A simple expression such as /\d{3}\.\d{3}\.\d{3}\.\d{3}/ will incorrectly match strings such as 789.23.2.900, which is outside the range of valid IP addresses (i.e., 0.0.0.0 to 255.255.255.255). Damian Conway's Regexp::Common module provides a very effective regular expression which matches only valid IP addresses.

Thursday, March 27, 2008

perl: extracting email addresses from a thunderbird message list

The inputfile resides somewhere like: "C:\Documents and Settings\USERNAME\Application Data\Thunderbird\Profiles\sbdq6f9n.default\Mail\Local Folders\Inbox.sbd\guitar.sbd"

use strict;
use warnings;
use Tie::File;
use Fcntl;


sub alphabetically { lc $a cmp lc $b }


my $inputfile = shift;

unless ($inputfile)
{
    print "\nUSAGE:\n$0  \n\n\n\n";
    exit 0;
}

my @contents;

tie (@contents, 'Tie::File', $inputfile, mode=>O_RDONLY) or die "Can't open $inputfile: $!\n\n\n\n";
#open (MYFILE, $inputfile) || die " $! ";
#my @contents = ;
#close(MYFILE);


my @addresses; my $line; my $x; my %seen = ();


foreach $line (@contents) {
        if ( $line =~ m/([a-zA-Z]+\.[a-zA-Z]+\@calsoftinc\.com)/ ) 
        {
            if ( $1=~m/ambar/) 
            {  
                next; 
            }
            else { $seen{$1}++; } #use a hash to automatically get a unique list
        }
}

foreach $x (keys %seen) { unshift(@addresses, "$x\n"); }

print sort alphabetically @addresses;