Effective_Python mapreduce
<1>完全吊炸天构造器的写法。。。
import os
import threading,time
class GenericInputData(object):
def read(self):
raise NotImplementedError
@classmethod
def generate_inputs(cls,config):
raise NotImplementedError class PathInputData(GenericInputData):
def __init__(self,path):
super(PathInputData, self).__init__()
self.path=path
def read(self):
return open(self.path).read()
def get_path_name(self):
return self.path """this class method init the Constructor function->__init__() ... """
@classmethod
def generate_inputs(cls,config):
data_dir = config['data_dir'] #dict elements
for name in os.listdir(data_dir):
yield cls(os.path.join(data_dir,name)) class GenerateWorker(object):
def __init__(self,input_data):
self.input_data = input_data
self.result = None
def map(self):
raise NotImplementedError
def reduce(self, other):
raise NotImplementedError
@classmethod
def create_workers(cls,input_class,config):
workers = []
for input_path_data in input_class.generate_inputs(config):
workers.append(cls(input_path_data)) # direct __init__ Constructor function
return workers class LineCountWorker(GenerateWorker):
"""default no __init__ function, then will use the default parent class __init__"""
def __init__(self,input_data):
super(LineCountWorker, self).__init__(input_data)
def map(self):
data = self.input_data.read()
self.result = data.count("\n")
def reduce(self, other):
self.result+= other.result
def get_worker_name(self):
return self.input_data.get_path_name() class Thread_Excute_workers(threading.Thread):
def __init__(self,threadId,worker):
super(Thread_Excute_workers, self).__init__()
self.worker = worker
self.th_id = threadId
def run(self):
self.worker.map()
print "Thread ID " + str(self.th_id) + " run " + self.worker.get_worker_name() + '\n' def excute(workers):
threads = [] # create thread elements
thread_id = 0
for w in workers:
th = Thread_Excute_workers(thread_id,w)
th.start()
threads.append(th)
thread_id = thread_id + 1
for rh_thread in threads:
rh_thread.join() # caculate worker of reduce
first,rest = workers[0],workers[1:]
for rh_work in rest:
first.reduce(rh_work)
return first.result def mapreduce(worker_class,input_class,config):
workers = worker_class.create_workers(input_class,config)
return excute(workers) config = {'data_dir': "C:\\data_dir"}
result = mapreduce(LineCountWorker,PathInputData,config)
print result
<2> Create md5
import hashlib def getMd5(data):
md5_obj = hashlib.md5()
md5_obj.update(str(data))
md5_var = md5_obj.hexdigest()
return md5_var
if __name__ == "__main__":
a = getMd5(1)
b = getMd5(1)
print a==b
print a
<3> 单元测试
def just_do_it(text):
return text.capitalize()
cap.py
import cap
import unittest
class TestCap(unittest.TestCase):
def test_one_word(self):
text = 'duck'
result = cap.just_do_it(text)
self.assertEqual(result,'Duck') def test_length(self):
test = 'duck'
result = len(cap.just_do_it(test))
self.assertEqual(result,4)
if __name__ == '__main__':
log_file = "log_file.txt"
f = open(log_file, "w")
runner = unittest.TextTestRunner(stream=f,verbosity=2)
unittest.main(exit = False,testRunner=runner)
f.close()
unittest_cap
如果不想放在文件里,直接unittest.main()
<4>Bytes 子节
(1)转换字节:

(2) 读取png 文件的width ,height
import struct
import pprint
import binascii
pngHeader = b'\x89PNG\r\n\x1a\n'
f = file('test.png',mode='r')
pngByte = f.read(30) if pngByte[:8] == pngHeader:
print 'this is a png file'
# '> big-endian'
# 'L is 4 bytes unsigned long int'
# 'width is at 16-20 stream pos, height is 21-24 stream pos'
# 'L L is 8 bytes'
width,height = struct.unpack('>LL',pngByte[16:24])
print 'width height:',width,height print 'width bytes is ' , struct.unpack('>L',pngByte[16:20])
print 'height bytes is ' , pngByte[20:24]
(3)一些类的概念:
"""
# <1>PROPERTY do not hidden_name member , but can use name=PROPERTY(get,set)
class duck():
def __init__(self,input_name):
self.hidden_name = input_name
def get_name(self):
print 'inside getter'
return self.hidden_name
def set_name(self,input_name):
self.hidden_name = input_name
name = property(get_name,set_name) if __name__ == "__main__":
d = duck('tttttt')
d.hidden_name = 'ttt' # Very stupid , not hidden
#d.set_name() # this can be called ok
print d.name
""" """
# <2>use @property and setter method
class duck():
def __init__(self,input_name):
self.hidden_name = input_name #GETTER METHOD
@property
def name(self):
print 'inside the getter'
return self.hidden_name #SETTER METHOD
@name.setter
def name(self,input_name):
print 'inside the getter'
self.hidden_name = input_name d = duck('houdini')
d.name = "test"
print d.name
""" """
#<3> @property connect the self.member
class Circle():
def __init__(self,radius):
self.radius = radius @property
def diameter(self):
return self.radius*2 c = Circle(2)
print c.radius # 2
print c.diameter # 4
c.radius = 7
print c.diameter # 14 c.diameter = 1000 # It's can not set value,because it have not diameter.setter(),but in py2.7 ,it set ok......
print c.diameter # 1000
""" """
#<4> hidden ?
class Duck():
def __init__(self,input_name):
self.__name = input_name @property
def name(self):
print 'getter method'
return self.__name
@name.setter
def name(self,input_name):
print 'setter method'
self.__name = input_name d = Duck('Maya')
print d.name
d.name = 'Houdini'
print d.name
#print d.__name #ERROR
#print d._Duck__name #Get hidden member,But Result is Maya....
""" """
#<5> @classmethod,class member
class A():
count = 0 #Same as C++ static member
def __init__(self):
A.count += 1 #Same as C++ static member @classmethod
def kids(cls):
print " A has childs num is " ,cls.count
a1=A()
a2=A()
a3=A()
A.kids()
""" #<6> Magic method
class Word():
def __init__(self,text):
self.text = text def __eq__(self, other):
return self.text == other.text def __add__(self, other):
return Word(self.text+other.text) def __sub__(self, other):
return Word(self.text-other.text)
a = Word(1)
b = Word(2)
print a==b # False
c = a+b
print c.text #
print isinstance(c,int) # False
print isinstance(c,float) # False
print isinstance(c,Word) # True
(4) 深入函数参数:*arg,**kwargs,指向函数的参数
# coding=utf-8
'''
Created by yangping liu on 2017-05-19.
Copyright (c) 2018 YiAnimation.All rights reserved.
''' #<1>
#参数*arg,其实进去就作为元组
def test_turple(*arg): #arg as tuple
print arg
for x in arg:
print '*arg index value is ' ,x
#参数**kwargs,进去就作为字典
def test_dictArguments(farg, **kwargs): #kwargs as dict
print "farg:", farg
for key in kwargs:
print "another keyword arg: %s: %s" % (key, kwargs[key]) test_turple(1,2,3,4,5)
test_dictArguments(farg=1, myarg2="two", myarg3=3) #<2>
#定义一个add函数,接受*args,其实*args是有顺序的元组
def add(*args):
return sum(args) #定义一个callback,用来指向函数,而args是callback函数的参数
def testAddpointer(callback,*args):
if(len(args)) == 0 : #non arg function
return callback()
return callback(*args) print testAddpointer(add)
print testAddpointer(add,1,2,3)
print testAddpointer(add,1,2,4,5,6,7,8,9) #定义一个只有2个参数,其实对于+法功能其实很垃圾,毕竟不能1+2+3+4...
def addBad(x=0,y=1):
return x+y
print testAddpointer(addBad) # 1 我们的函数参数 函数指针依然适合
print testAddpointer(addBad,1,2) # 3 函数参数 函数指针依然适合 #<3>
print "\ndict to function arguments samples"
#定义一个加法,不过这次有3个参数
def add2(arg1,arg2,arg3):
return arg1 + arg2 + arg3
kwargs = {"arg2" :2,"arg3" :3}
#把1可以传入arg1,**kwargs就会会作为arg2 = 2 ,arg3=3
print add2(1,**kwargs) #结果6 #<4>
print '\nfunction pointer to a function,args is **kwargs'
def add3(master,senior):
return master+senior
#我们函数参数这次带的是**kwargs
def testAddPointerDict(callback,**kwargs):
return add3(**kwargs) #注意传入方法
dictFunctionArg = {'master':1,'senior':2}
print testAddPointerDict(add3,**dictFunctionArg) # ok
print testAddPointerDict(add3,master=1,senior=2) # ok
print testAddPointerDict(add3,senior=2,master=1) # ok
(5)修饰器:
def document_it(func):
def new_function(*args,**kwargs):
print "running function : " ,func.__name__
print 'position arguments : ',args
print 'keyword arguments : ' ,kwargs
result = func(*args,**kwargs)
print('Result :',result)
return result
return new_function def add_ints(a,b):
return a+b cooler_add_ints = document_it(add_ints) #implicat the de
print cooler_add_ints(a=1,b=2)
print cooler_add_ints(b=3,a=2)
print cooler_add_ints(1,2) # direct document this function
@document_it
def add_ints2(a,b):
return a+b
(6)如何制作更加成熟的callBack
class callback(object):
def __init__(self, func, *args, **kwargs):
self.func = func
self.args = args
self.kwargs = kwargs def __call__(self, *args):
try:
return self.func(*self.args, **self.kwargs)
except:
return None #define add_simple
def add_simple(x,y):
return x+y
add_simpleCallBack = callback(add_simple,10,15) #define print simple
def print_simple(var= 'Error Code'):
print var
print_simpleCallBack = callback(print_simple) if add_simpleCallBack:
print 'callback add object object:' ,add_simpleCallBack()
if print_simpleCallBack:
print 'callback print simple:',print_simpleCallBack()
(7)一些特殊方法测试:
class Description:
def __init__(self):
self.data = []
def __add__(self, other):
self.data.append(other)
return self
def __str__(self):
return str(self.data)
def __sub__(self, other):
self.data.remove(other)
return self
def __len__(self):
return len(self.data)
def __getitem__(self, item):
return self.data[item] if __name__ == "__main__":
e = Description()
e+=1
e+=2
e+=3
e-=3
e+='houdini'
e+='maya'
print 'length is ',len(e), ' Data is :',e
print e[0]
(8)打包
C:\Python27\Scripts\pyinstaller.exe -F -w --name MusterRendering --icon=icon.ico sqlite_muster.py


<>元类:
"Type" create a class:
class ObjectCreator(object):
pass def echo(cls):
print cls def unit_test_part1():
my_object = ObjectCreator()
echo(my_object)
echo(ObjectCreator)
echo(hasattr(ObjectCreator, "new_attribute")) # add new_attribute='foo' for class
ObjectCreator.new_attribute = 'foo' #attribute for class
echo(hasattr(ObjectCreator, "new_attribute")) #true
echo(ObjectCreator.new_attribute) #foo #Class to a variable
nclass = ObjectCreator
echo(hasattr(nclass,"new_attribute")) #true
echo(nclass.new_attribute) #foo # type create class
foo = type('foo',(),{'bar':True}) #create a class 'foo'
echo(foo.__class__) #type 'type'
echo(type(foo)) #type 'type'
echo(foo) #<class '__main__.foo'>
echo(foo.bar) #true
fooChild = type('fooChild',(foo,),{'ok':False})
echo(fooChild)
echo(fooChild.bar) #true
echo(fooChild.ok)
type.__class__ ,type.__class__.__class__
# unit_test part2
def unit_test_part2():
def foo():
pass
echo(foo.__class__) #type 'function'
echo('houdini'.__class__) #type 'str'
echo((2).__class__) #type 'int'
echo((2).__class__.__class__) #type 'type'
Change class member name to a Upper.
def upper_attr(future_class_name, future_class_parents, future_class_attr):
print future_class_name,"|",future_class_parents,"|",future_class_attr
att = {}
for name,value in future_class_attr.items():
if name.startswith('__'):
continue
att[name.upper()] = value
att['json'] = "json" # add a lower attribute for class return type(future_class_name, future_class_parents, att) #__metaclass__ = upper_attr class Foo(object):
bar = 'pip'
hou = 'houdini'
__metaclass__ = upper_attr if __name__ == "__main__":
print Foo.BAR
print Foo.HOU
print Foo.json
OOP metaClass
generate some attribute for class
class MetaClass(type):
def __new__(cls,name,baseClass,dict):
print '==================='
print cls
print name
print baseClass
print dict
print '===================' attrib = {}
attrib['houdini'] = ''
attrib['maya'] = ''
attrib['nuke'] = ''
return super(MetaClass, cls).__new__(cls,name,baseClass,attrib) class Foo2(object):
__metaclass__ = MetaClass if __name__ == "__main__":
print Foo2.houdini
print Foo2.maya
print Foo2.nuke
结果:
===================
<class '__main__.MetaClass'>
Foo2
(<type 'object'>,)
{'__module__': '__main__', '__metaclass__': <class '__main__.MetaClass'>}
===================
1
2
3
PyQt5 对应python2.7
pip install python-qt5
..
Effective_Python mapreduce的更多相关文章
- Mapreduce的文件和hbase共同输入
Mapreduce的文件和hbase共同输入 package duogemap; import java.io.IOException; import org.apache.hadoop.co ...
- mapreduce多文件输出的两方法
mapreduce多文件输出的两方法 package duogemap; import java.io.IOException; import org.apache.hadoop.conf ...
- mapreduce中一个map多个输入路径
package duogemap; import java.io.IOException; import java.util.ArrayList; import java.util.List; imp ...
- Hadoop 中利用 mapreduce 读写 mysql 数据
Hadoop 中利用 mapreduce 读写 mysql 数据 有时候我们在项目中会遇到输入结果集很大,但是输出结果很小,比如一些 pv.uv 数据,然后为了实时查询的需求,或者一些 OLAP ...
- [Hadoop in Action] 第5章 高阶MapReduce
链接多个MapReduce作业 执行多个数据集的联结 生成Bloom filter 1.链接MapReduce作业 [顺序链接MapReduce作业] mapreduce-1 | mapr ...
- MapReduce
2016-12-21 16:53:49 mapred-default.xml mapreduce.input.fileinputformat.split.minsize 0 The minimum ...
- 使用mapreduce计算环比的实例
最近做了一个小的mapreduce程序,主要目的是计算环比值最高的前5名,本来打算使用spark计算,可是本人目前spark还只是简单看了下,因此就先改用mapreduce计算了,今天和大家分享下这个 ...
- MapReduce剖析笔记之八: Map输出数据的处理类MapOutputBuffer分析
在上一节我们分析了Child子进程启动,处理Map.Reduce任务的主要过程,但对于一些细节没有分析,这一节主要对MapOutputBuffer这个关键类进行分析. MapOutputBuffer顾 ...
- MapReduce剖析笔记之七:Child子进程处理Map和Reduce任务的主要流程
在上一节我们分析了TaskTracker如何对JobTracker分配过来的任务进行初始化,并创建各类JVM启动所需的信息,最终创建JVM的整个过程,本节我们继续来看,JVM启动后,执行的是Child ...
随机推荐
- HTML5初学篇章_3
表单的标签是<form>,它使页面与客户的互动成为可能.而它的大部分元素字自HTML2.0后就没有再改变过,由此可见这是一个多么具有卓越性的设计. <form>标签是用于创建供 ...
- C++STL -- vector 使用
vector是一种顺序容器. vector常用API: 现在一个个分析: 1. assign 这是一种赋值方法,但是会覆盖原来容器内的值. void assign( size_type num, co ...
- Java NIO 网络编程基础
Java NIO提供了一套网络api,可以用来处理连接数很多的情况.他的基本思想就是用一个线程来处理多个channel. 123456789101112131415161718192021222324 ...
- mvn打包idea项目
首先 通过cmd进入docs 然后用cd命令进入项目文件夹所在路径 然后输入mvn -Dmaven.test.skip=true package//-Dmaven.test.skip=true跳过测试
- Tomcat性能调优
1.集成apache 虽然Tomcat也可以作web服务器,但是处理静态html的速度比不上apache,且其作为web服务器的功能远不如Apache,因此把apache和tomcat集成起来,讲ht ...
- 关于Currency类型和 TCurrencyFiled的悲剧
这2天程序出问题, 用户结算金额经常莫名其妙的多出了小数点后几位, 不用思考 肯定是因为浮点精度不准确的问题 查了一下, 程序中的数据类型使用的是Currency, 按照数据类型的描述, 这个金额类型 ...
- Linux内核设计第二周——操作系统工作原理
Linux内核设计第二周 ——操作系统工作原理 作者:宋宸宁(20135315) 一.实验过程 图1 执行效果 从图中可以看出,每执行my_ start_ kernel函数两次或一次,my_ time ...
- 【测试分析】HTSM模型
◆版权声明:本文出自胖喵~的博客,转载必须注明出处. 转载请注明出处:http://www.cnblogs.com/by-dream/p/5508428.html 概述 HTSM全称Heuristic ...
- [LeetCode]题解(python):120 Triangle
题目来源 https://leetcode.com/problems/triangle/ Given a triangle, find the minimum path sum from top to ...
- centos7 systemctl命令
systemctl命令是系统服务管理器指令,它实际上将 service 和 chkconfig 这两个命令组合到一起. 实例: 启动nfs服务:systemctl start nfs-server.s ...