python

12月 182012
 
#!/usr/bin/env python
#-*- coding:utf-8 -*-

from __future__ import division
import urllib
import csv
from string import punctuation

#数据文件
files = ['negative.txt', 'positive.txt', 'obama_tweets.txt']

#数据url地址
path = 'http://www.unc.edu/~ncaren/haphazard/'
for file_name in files:
    #检索数据
    urllib.urlretrieve(path + file_name, file_name)

#打开tweets文件
tweets = open("obama_tweets.txt").read()
tweets_list = tweets.split('\n')

pos_sent = open("positive.txt").read()
positive_words = pos_sent.split('\n')
positive_counts = []

neg_sent = open('negative.txt').read()
negative_words = neg_sent.split('\n')
negative_counts = []

#遍历tweet
for tweet in tweets_list:
    positive_counter = 0
    negative_counter = 0
    #内容转化为小写
    tweet_processed = tweet.lower()

    #去除标点符号
    for p in list(punctuation):
        tweet_processed = tweet_processed.replace(p, '')

    #切词
    words = tweet_processed.split(' ')
    word_count = len(words)
    #统计积极和消极词频
    for word in words:
        if word in positive_words:
            positive_counter = positive_counter + 1
        elif word in negative_words:
            negative_counter = negative_counter + 1

    positive_counts.append(positive_counter / word_count)
    negative_counts.append(negative_counter / word_count)

print len(positive_counts)

output = zip(tweets_list, positive_counts, negative_counts)

writer = csv.writer(open('tweet_sentiment.csv', 'wb'))
writer.writerows(output)


 青春就应该这样绽放  游戏测试:三国时期谁是你最好的兄弟!!  你不得不信的星座秘密
 Posted by at 10:01 上午
5月 312012
 

商品浏览次数,商品销量预测

base line方法

#!/usr/bin/env python
#-*- coding:utf-8 -*-

import sys
from math import log, sqrt

def str2list(strDateNum, separator):
    dDateNum = {}
    date = []
    view = []
    for i in strDateNum.split(separator):
        d, v = i.split(':')
        dDateNum.setdefault(int(d), int(v))
   
    l = sorted([(k, v) for k, v in dDateNum.iteritems()])
   
    for item in l:
        date.append(item[0])
        view.append(item[1])
       
    return date, view

def calc_month_trend(strDateNum):
    date, view = str2list(strDateNum, ',')
    trend2 = sum(view[-15 : ])
    trend1 = 1.0 * sum(view[-30 : -15])
    month_trend = trend2 - trend1
   
    date_str = '[%s]' % ','.join(map(str, date))
    view_str = '[%s]' % ','.join(map(str, view))
   
    return month_trend, date_str, view_str

def calc_daily_trend(strDateNum, Total):
    date, view = str2list(strDateNum, ',')
    y2 = view[-1]
    y1 = view[-8]
    weekly_view = sum(view[-8 : -1])
    slope = y2 - y1
    trend = round(slope  * (1.0 + log(1.0 +int(weekly_view))))
    error = round(1.0 / sqrt(int(Total)), 2)
    return trend, error

if __name__ == '__main__':
   
    #main()
    strList = '2:3,1:4,5:4,34:3,4:56,33:4,45:23,23:5,12:1,\
    13:2,14:3,9:0,10:3,100:1,101:2,103:3,104:4,105:5,106:6,\
    107:7,108:8,109:9,110:10,111:12,\
    112:12,113:13,114:1,115:34,116:16,117:17'
    a1, a2 = str2list(strList, ',')
    print a1
    print a2
    month_trend, date_str, view_str  = calc_month_trend(strList)
    print month_trend
    print date_str
    print view_str
 
    daily_trend, error = calc_daily_trend(strList, 50)
    print daily_trend
    print error

 

测试结果:

[1, 2, 4, 5, 9, 10, 12, 13, 14, 23, 33, 34, 45, 100, 101, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117]
[4, 3, 56, 4, 0, 3, 1, 2, 3, 5, 4, 3, 23, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 12, 13, 1, 34, 16, 17]
43.0
[1,2,4,5,9,10,12,13,14,23,33,34,45,100,101,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117]
[4,3,56,4,0,3,1,2,3,5,4,3,23,1,2,3,4,5,6,7,8,9,10,12,12,13,1,34,16,17]
39.0
0.14

PS:

 

实际数据可以进行检验,这里主要是简单的测试

一种基本的趋势预测方法,base line方法

经过实际检验,目前采用指数移动平均方法处理误差较小(注意小技巧,周末的数据需要另外处理)

数据量大的情况下,采用python + hadoop streaming方式处理,速度很快

指数移动平均法http://en.wikipedia.org/wiki/Moving_average

这个用来做业务团队KPI预测,譬如成交量,销量等等

 

指数移动平均结果

Day 01 value=143.00 2-bar EMA=143.000000 and SMA=143.000000
Day 02 value=161.00 2-bar EMA=152.000000 and SMA=152.000000
Day 03 value=328.00 2-bar EMA=310.400000 and SMA=244.500000
Day 04 value=198.00 2-bar EMA=209.240000 and SMA=263.000000
Day 05 value=166.00 2-bar EMA=170.324000 and SMA=182.000000
Day 06 value=130.00 2-bar EMA=134.032400 and SMA=148.000000
Day 07 value=125.00 2-bar EMA=125.903240 and SMA=127.500000
Day 08 value=128.00 2-bar EMA=127.790324 and SMA=126.500000
Day 09 value=133.00 2-bar EMA=132.479032 and SMA=130.500000
Day 10 value=162.00 2-bar EMA=159.047903 and SMA=147.500000
Day 11 value=124.00 2-bar EMA=127.504790 and SMA=143.000000
Day 12 value=131.00 2-bar EMA=130.650479 and SMA=127.500000
Day 13 value=132.00 2-bar EMA=131.865048 and SMA=131.500000
Day 14 value=125.00 2-bar EMA=125.686505 and SMA=128.500000
Day 15 value=109.00 2-bar EMA=110.668650 and SMA=117.000000
Day 16 value=151.00 2-bar EMA=146.966865 and SMA=130.000000
Day 17 value=111.00 2-bar EMA=114.596687 and SMA=131.000000
Day 18 value=103.00 2-bar EMA=104.159669 and SMA=107.000000
Day 19 value=170.00 2-bar EMA=163.415967 and SMA=136.500000
Day 20 value=117.00 2-bar EMA=121.641597 and SMA=143.500000
Day 21 value=93.00 2-bar EMA=95.864160 and SMA=105.000000
Day 22 value=88.00 2-bar EMA=88.786416 and SMA=90.500000
Day 23 value=82.00 2-bar EMA=82.678642 and SMA=85.000000
Day 24 value=100.00 2-bar EMA=98.267864 and SMA=91.000000
Day 25 value=71.00 2-bar EMA=73.726786 and SMA=85.500000
Day 26 value=80.00 2-bar EMA=79.372679 and SMA=75.500000
Day 27 value=95.00 2-bar EMA=93.437268 and SMA=87.500000
Day 28 value=122.00 2-bar EMA=119.143727 and SMA=108.500000
Day 29 value=67.00 2-bar EMA=72.214373 and SMA=94.500000
Day 30 value=78.00 2-bar EMA=77.421437 and SMA=72.500000

扩展为数据产品,进行信息纰漏

image


 青春就应该这样绽放  游戏测试:三国时期谁是你最好的兄弟!!  你不得不信的星座秘密
 Posted by at 9:20 上午
12月 012011
 
Thanks to Tricia’s introduction, I recently realized that Google Analytics is such a powerful tool for web analytics or business intelligence. It will fit the special needs if we use SAS to analyze the well-structure users’ data accumulated in Google Analytics. The challenge is that Google Analytics API and SAS hardly meet each other: Google Analytics often serves web/Linux, and SAS dwells in the
10月 312011
 

Inspired by Jian’s polyglot programming practice, I also begin to brush up Python and C++ which I learned during graduate school. Following is a Python response to one of Jian Dai’s former programming challenges for lines count of source codes:
[cce lang="python"]
import os

#count number of lines of
#single file
def lineCount(fileName):
countSingle=0
for line in open(fileName):
countSingle += 1
return countSingle

#count number of lines of
#directory and subdirectories
def dirCount(dir,extension):
countTotal=0
for r,d,f in os.walk(dir):
for files in f:
if files.endswith(extension):
fileName=os.path.join(r,files)
countSingle=lineCount(fileName)
countTotal += countSingle
return countTotal

a=dirCount(“C:/Program Files/CDISC Express/”,”.sas”)

print a
[/cc]

I use python-2.7.2, the final Python 2.x release most because of the various modules support for learning purpose. The book helps me to get the quick review of Python is Think Python: How to Think Like a Computer Scientist by Allen Downey.

Also, I begin to use CodeColorer for this blog to insert codes.

10月 052011
 

大半年没跟新了。一些个人技术方面的动向在英文博客里:

1.  CDISC Express

CDISC Express是一款开源的SDTM转化器,基于SAS和Excel(其实背后是XML)。我为这个产品写了一系列的入门教程,并参加了一个相应的比赛(赢了一台iPad)。益辉最近也抽中了一台Kindle,加上上半年在SAS Global Forum拎回来的一台Kindle,不禁感慨,知识(和概率)真是咱农民的第一生产力呀。对着手头这台上世纪的诺基亚6300,我坚定的目光瞄上了iPhone5。有一句话说,做自己喜欢的事,然后找一个人或机构买单,人生一乐也。

又,把临床数据自动导入到符合CDISC标准的数据集,现在有不少vendor在做,很多药厂内部也有开发。感兴趣的朋友不妨也鼓捣鼓捣。

2. Big Data

现在还提这个概念就较不时尚了,重要的是如何做,或者如何为以后做些准备。SAS也将加入对Hadoop的支持,不过现在Hadoop的框架还是基于Java,拣回来些C/C++、Java或者Python还是很有戏场的(甚至R也能够玩会Hapoop),对一个SAS程序员来说,是时候更新自个的工具箱了。

迄今为止,这个领域还是IT人士的天下。这一方面说明了我们这些传统的数据分析人员还需要夯实技术(去看看Hadoop的‘hello world’, 所有的操作都要分解为map()和reduce(),而这并不是所有人都习惯的),也说明目前这一块,发展还在起步阶段,因为这些框架,最终都是要为了让数据产生价值,而这就不可能有数据分析师的缺席。

3. SAS之外

这些日子,在SAS之外,试图拣回来些数据挖掘的东西。本来准备四月份在拉斯维加考下SAS数据挖掘的认证,后来还是被其他事情给挪掉了。最后所得就是,还是稍微看了下这块的东西,在概念上是过了一遍。这次在拉斯维加,碰到以前在北大一起做SAS俱乐部的朋友光辉,他现在加拿大工作。光辉说他趁着开会的缝隙,随手把SAS的数据挖掘认证(SAS Certified Predictive Modeler using SAS Enterprise Miner 5 or 6 Credential)给考了。这叫行动力啊。

机缘巧合,这上半年有朋友给我介绍Feature Selection(大白话就叫变量选择,在数据挖掘里面属于前期的工作),我就对着一个R的包看了一遍,也就是在脑袋里过一下,最后没有什么成果出来。另一个额外的好处是,这次我是对R在学习方面的优势有了切身体验,对R算是有了些个人感情。一边找论文看,然后几乎就能找到一个相应的R包跑一跑,增加些手感,这效果真是不错。

再说些R。我还真尝试过认真学一学R。做为一个SAS程序员,想,还是从input/output开始吧,就先研读官方文档R Data Import/Export,转了一圈,发现,这数据读取转存还是SAS更为方便有效,对我来说,鼓捣这个有点得不偿失,遂罢。

又想,R对SAS的一个优势在于它支持多种数据结构,像array、list之类,就从这入手了。不知道怎么搞的,在学习R的数据结构时,可能是我学习不够深入,修为不够,总觉得有些不够畅快——具体我说不上来。想,我还是把自己的Python拣回来吧,R跟Python有类似的地方。后来我就能说上来了,Python的List等数据结构,对我来说,的确是更为优雅(换一种说法,或者是因为我还没有读到一本“优雅地”介绍R数据结果的文档)。好吧,到此为止,我的R还停留在用它尝试算法的阶段。

然后就是捡回Python。我两年多的SAS程序员工作经历,一个直接的后果就是把之前学过的一点C++、Java和Python又打回“hello world”的水平,手都生疏了。我读了很多Jian Dai(他在加州做SAS程序员)的代码,包括Perl, C, JavaSAcript等等,想,做SAS程序员也不至于把其他的语言都丢掉,为了方便,又重新学习下Python。

为学而学毕竟有局限,在工作中使用当然来得最快。现在我还没有这机会,除了翻文档之外,就尝试用Python写些小东西,把它就当一回事一样。其实,除了工作中直接使用,还有一项就是参加开源运动——我看益辉这几年写R代码是风生水起,一个极大的外部刺激就是参与开源运动,整个积极性都给起来了。一个普通的统计学博士生,用不了那个多的代码量,那动力,就在于统计之外。

统计。今年的基础统计学习,没有去年那么饱满。每天跟统计师打交道,就想好好学习一下他们的语言。今年,到现在为止,算是在工作中(向统计师)学习,整了下CI calculation和euivalence and noninferiority test。下一步准备鼓捣些Sample Size之类。有个哥们写了一篇文章叫Programmers Need To Learn Statistics Or I Will Kill Them All,好吧,作为一个程序员,为了生命安全,每天跟统计师打交道,还是学下统计了。

4. 工作

不是说自己的工作,是说工作与实习机会。感兴趣的朋友,不妨跟我联系。中南大学统计系的本科生韩帅,不久前结束在我公司的实习,在Sxlion主持的SAS中文门户写了两篇非常有价值的总结文章,大伙可以去看看:

一个SAS菜鸟的故事 学习篇

一个SAS菜鸟的故事 实习篇

人是这样的,只有经历过了(我说的是第一份实习或其他),才能自信地自称“菜鸟”,否则就真是论坛里“弱弱地问”的菜鸟。我推荐我公司,当然是因为最熟悉。说实在的,在北京,在药厂,对SAS程序员来说,这的确是一份不错的的实习机会。去年我也推荐了汤耀华来我公司做实习生,他现在中科院读研。这两位都来自中南大学,怎么说?对我和我的同事来说,他们提升了中南大学在我们心目中的分量。在上海,我想,很多SAS程序员对中南大学也很有兴趣。

一些大学生朋友给我写邮件打电话,说些职业或技术选择的事。这不是选择两个到手的offer,大哥,随便扎进一个方向,都比在那犹豫强。

5. SAS

还是得提一下SAS。现在用上SAS9.2了(然后SAS9.3就华丽丽地发布了。。。),对我来说,直接地,有两个触动:

1)SAS9.2及以后,作图更漂亮,我终于有些动力研究下SAS作图了。在公司,去年与宾州的同事一道,把公司作图的macro(基于9.1.3)的更新以后,以后都直接调用,压根就没写过proc gplot之类,然后我就发现自己作图的那一点点手艺正在迅速凋零。。。

2)SAS9.2中,BASE中正式支持自定义函数(仿佛听到其他门类的程序员在冷笑,我弱弱地辩解一下,SAS/IML早就可以自定义函数,而且对SAS程序员来讲,一直使用macro写一种叫function-like macro,而且,macro也支持递归。。。)。这将给我们的编程生活带来不少乐趣,其中之一就是可以把以前的一些macro改写成函数(方便debug啊)。

在写函数之前,我把SAS BASE里面的函数都看了一遍,省得以后重复造轮子:

SAS_Func

 

 

 

 

 

 

 

把所有的函数分门别类导入到Excel,然后动用一点VBA,把每个函数的解释都搁进每个函数名的comments里。这么一梳理,又发现不少有趣的东西,再议。

10月 042011
 

In last post, I mentioned Hadoop, the open source implementation of Google’s MapReduce for parallelized processing of big data. In this long National Holiday, I read the original Google paper, MapReduce: Simplified Data Processing on Large Clusters by Jeffrey Dean and Sanjay Ghemawat and got that the terminologies of “map” and “reduce” were basically borrowed from Lisp, an old functional language that I even didn’t play “hello world” with. For Python users, the idea of Map and Reduce is also very straightforward because the workhorse data structure in Python is just the list, a sequence of values that you can just imagine that they are the nodes(clusters, chunk servers, …) in a distributed system.

MapReduce is a programming framework and really language independent, so SAS users can also get the basic idea from their daily programming practices and here is just a simple illustration using data step array (not array in Proc FCMP or matrix in IML). Data step array in SAS is fundamentally not a data structure but a convenient way of processing group of variables, but it can also be used to play some list operations like in Python and other rich data structure supporting languages(an editable version can be founded in here):

MapReduce

Follow code above, the programming task is to capitalize a string “Hadoop” (Line 2) and the “master” method is just to capitalize the string in buddle(Line 8): just use a master machine to processing the data.

Then we introduce the idea of “big data” that the string is too huge to one master machine, so “master method” failed. Now we distribute the task to thousands of low cost machines (workers, slaves, chunk servers,. . . in this case, the one dimensional array with size of 6, see Line 11), each machine produces parts of the job (each array element only capitalizes a single letter in sequence, see Line 12-14). Such distributing operation is called “map”. In a MapReduce system, a master machine is also needed to assign the maps and reduce.

How about “reduce”?  A “reduce” operation is also called “fold”—for example, in Line 17, the operation to combine all the separately values into a single value: combine results from multiple worker machines.

8月 122011
 
昨天研究了下Redis,做了几个实验。Sina weibo是用的redis,处理key-value数据
afan@ubuntu:~$ tar zxvf redis-2.2.12.tar.gz 
redis-2.2.12/
redis-2.2.12/.gitignore
redis-2.2.12/00-RELEASENOTES
redis-2.2.12/BUGS
redis-2.2.12/CONTRIBUTING
redis-2.2.12/COPYING
redis-2.2.12/Changelog
redis-2.2.12/INSTALL
redis-2.2.12/Makefile
redis-2.2.12/README
redis-2.2.12/TODO
redis-2.2.12/client-libraries/
redis-2.2.12/client-libraries/README
redis-2.2.12/deps/
redis-2.2.12/deps/hiredis/
redis-2.2.12/deps/hiredis/.gitignore
redis-2.2.12/deps/hiredis/COPYING
redis-2.2.12/deps/hiredis/Makefile
redis-2.2.12/deps/hiredis/README.md
redis-2.2.12/deps/hiredis/TODO
redis-2.2.12/deps/hiredis/adapters/
redis-2.2.12/deps/hiredis/adapters/ae.h
redis-2.2.12/deps/hiredis/adapters/libev.h
redis-2.2.12/deps/hiredis/adapters/libevent.h
redis-2.2.12/deps/hiredis/async.c
redis-2.2.12/deps/hiredis/async.h
redis-2.2.12/deps/hiredis/example-ae.c
redis-2.2.12/deps/hiredis/example-libev.c
redis-2.2.12/deps/hiredis/example-libevent.c
redis-2.2.12/deps/hiredis/example.c
redis-2.2.12/deps/hiredis/fmacros.h
redis-2.2.12/deps/hiredis/hiredis.c
redis-2.2.12/deps/hiredis/hiredis.h
redis-2.2.12/deps/hiredis/net.c
redis-2.2.12/deps/hiredis/net.h
redis-2.2.12/deps/hiredis/sds.c
redis-2.2.12/deps/hiredis/sds.h
redis-2.2.12/deps/hiredis/test.c
redis-2.2.12/deps/hiredis/util.h
redis-2.2.12/deps/linenoise/
redis-2.2.12/deps/linenoise/.gitignore
redis-2.2.12/deps/linenoise/Makefile
redis-2.2.12/deps/linenoise/README.markdown
redis-2.2.12/deps/linenoise/example.c
redis-2.2.12/deps/linenoise/linenoise.c
redis-2.2.12/deps/linenoise/linenoise.h
redis-2.2.12/design-documents/
redis-2.2.12/design-documents/REDIS-CLUSTER
redis-2.2.12/design-documents/REDIS-CLUSTER-2
redis-2.2.12/doc/
redis-2.2.12/doc/AppendCommand.html
redis-2.2.12/doc/AppendOnlyFileHowto.html
redis-2.2.12/doc/AuthCommand.html
redis-2.2.12/doc/Benchmarks.html
redis-2.2.12/doc/BgrewriteaofCommand.html
redis-2.2.12/doc/BgsaveCommand.html
redis-2.2.12/doc/BlpopCommand.html
redis-2.2.12/doc/BrpoplpushCommand.html
redis-2.2.12/doc/CommandReference.html
redis-2.2.12/doc/Comparisons.html
redis-2.2.12/doc/ConfigCommand.html
redis-2.2.12/doc/Configuration.html
redis-2.2.12/doc/ConnectionHandlingSidebar.html
redis-2.2.12/doc/ControlCommandsSidebar.html
redis-2.2.12/doc/Credits.html
redis-2.2.12/doc/DbsizeCommand.html
redis-2.2.12/doc/DelCommand.html
redis-2.2.12/doc/DesignPatterns.html
redis-2.2.12/doc/EventLibray.html
redis-2.2.12/doc/ExistsCommand.html
redis-2.2.12/doc/ExpireCommand.html
redis-2.2.12/doc/FAQ.html
redis-2.2.12/doc/Features.html
redis-2.2.12/doc/FlushallCommand.html
redis-2.2.12/doc/FlushdbCommand.html
redis-2.2.12/doc/FromSqlToDataStructures.html
redis-2.2.12/doc/GenericCommandsSidebar.html
redis-2.2.12/doc/GetCommand.html
redis-2.2.12/doc/GetbitCommand.html
redis-2.2.12/doc/GetsetCommand.html
redis-2.2.12/doc/HackingStrings.html
redis-2.2.12/doc/HashCommandsSidebar.html
redis-2.2.12/doc/Hashes.html
redis-2.2.12/doc/HdelCommand.html
redis-2.2.12/doc/HexistsCommand.html
redis-2.2.12/doc/HgetCommand.html
redis-2.2.12/doc/HgetallCommand.html
redis-2.2.12/doc/HincrbyCommand.html
redis-2.2.12/doc/HlenCommand.html
redis-2.2.12/doc/HmgetCommand.html
redis-2.2.12/doc/HmsetCommand.html
redis-2.2.12/doc/HsetCommand.html
redis-2.2.12/doc/HsetnxCommand.html
redis-2.2.12/doc/IncrCommand.html
redis-2.2.12/doc/InfoCommand.html
redis-2.2.12/doc/IntroductionToRedisDataTypes.html
redis-2.2.12/doc/KeysCommand.html
redis-2.2.12/doc/LastsaveCommand.html
redis-2.2.12/doc/LindexCommand.html
redis-2.2.12/doc/ListCommandsSidebar.html
redis-2.2.12/doc/Lists.html
redis-2.2.12/doc/LlenCommand.html
redis-2.2.12/doc/LpopCommand.html
redis-2.2.12/doc/LrangeCommand.html
redis-2.2.12/doc/LremCommand.html
redis-2.2.12/doc/LsetCommand.html
redis-2.2.12/doc/LtrimCommand.html
redis-2.2.12/doc/MgetCommand.html
redis-2.2.12/doc/MonitorCommand.html
redis-2.2.12/doc/MoveCommand.html
redis-2.2.12/doc/MsetCommand.html
redis-2.2.12/doc/MultiExecCommand.html
redis-2.2.12/doc/NonexistentCommands.html
redis-2.2.12/doc/ObjectHashMappers.html
redis-2.2.12/doc/Pipelining.html
redis-2.2.12/doc/ProgrammingExamples.html
redis-2.2.12/doc/ProtocolSpecification.html
redis-2.2.12/doc/PublishSubscribe.html
redis-2.2.12/doc/QuickStart.html
redis-2.2.12/doc/QuitCommand.html
redis-2.2.12/doc/README.html
redis-2.2.12/doc/RandomkeyCommand.html
redis-2.2.12/doc/Redis0100ChangeLog.html
redis-2.2.12/doc/Redis0900ChangeLog.html
redis-2.2.12/doc/RedisBigData.html
redis-2.2.12/doc/RedisCLI.html
redis-2.2.12/doc/RedisEventLibrary.html
redis-2.2.12/doc/RedisGuides.html
redis-2.2.12/doc/RedisInternals.html
redis-2.2.12/doc/RedisPipelining.html
redis-2.2.12/doc/RedisStatus.html
redis-2.2.12/doc/Redis_1_2_0_Changelog.html
redis-2.2.12/doc/Redis_2_0_0_Changelog.html
redis-2.2.12/doc/Redis_2_0_Whats_new.html
redis-2.2.12/doc/RenameCommand.html
redis-2.2.12/doc/RenamenxCommand.html
redis-2.2.12/doc/ReplicationHowto.html
redis-2.2.12/doc/ReplyTypes.html
redis-2.2.12/doc/RoadMap.html
redis-2.2.12/doc/RpoplpushCommand.html
redis-2.2.12/doc/RpushCommand.html
redis-2.2.12/doc/SaddCommand.html
redis-2.2.12/doc/SaveCommand.html
redis-2.2.12/doc/ScardCommand.html
redis-2.2.12/doc/SdiffCommand.html
redis-2.2.12/doc/SdiffstoreCommand.html
redis-2.2.12/doc/SelectCommand.html
redis-2.2.12/doc/SetCommand.html
redis-2.2.12/doc/SetCommandsSidebar.html
redis-2.2.12/doc/SetbitCommand.html
redis-2.2.12/doc/SetexCommand.html
redis-2.2.12/doc/SetnxCommand.html
redis-2.2.12/doc/SetrangeCommand.html
redis-2.2.12/doc/Sets.html
redis-2.2.12/doc/ShutdownCommand.html
redis-2.2.12/doc/SideBar.html
redis-2.2.12/doc/SinterCommand.html
redis-2.2.12/doc/SinterstoreCommand.html
redis-2.2.12/doc/SismemberCommand.html
redis-2.2.12/doc/SlaveofCommand.html
redis-2.2.12/doc/SmembersCommand.html
redis-2.2.12/doc/SmoveCommand.html
redis-2.2.12/doc/SortCommand.html
redis-2.2.12/doc/SortedSetCommandsSidebar.html
redis-2.2.12/doc/SortedSets.html
redis-2.2.12/doc/Speed.html
redis-2.2.12/doc/SponsorshipHistory.html
redis-2.2.12/doc/SpopCommand.html
redis-2.2.12/doc/SrandmemberCommand.html
redis-2.2.12/doc/SremCommand.html
redis-2.2.12/doc/StringCommandsSidebar.html
redis-2.2.12/doc/Strings.html
redis-2.2.12/doc/StrlenCommand.html
redis-2.2.12/doc/SubstrCommand.html
redis-2.2.12/doc/SunionCommand.html
redis-2.2.12/doc/SunionstoreCommand.html
redis-2.2.12/doc/SupportedLanguages.html
redis-2.2.12/doc/SupportedPlatforms.html
redis-2.2.12/doc/TemplateCommand.html
redis-2.2.12/doc/TtlCommand.html
redis-2.2.12/doc/TwitterAlikeExample.html
redis-2.2.12/doc/TypeCommand.html
redis-2.2.12/doc/UnstableSource.html
redis-2.2.12/doc/VirtualMemorySpecification.html
redis-2.2.12/doc/VirtualMemoryUserGuide.html
redis-2.2.12/doc/ZaddCommand.html
redis-2.2.12/doc/ZcardCommand.html
redis-2.2.12/doc/ZincrbyCommand.html
redis-2.2.12/doc/ZrangeCommand.html
redis-2.2.12/doc/ZrangebyscoreCommand.html
redis-2.2.12/doc/ZrankCommand.html
redis-2.2.12/doc/ZremCommand.html
redis-2.2.12/doc/ZremrangebyrankCommand.html
redis-2.2.12/doc/ZremrangebyscoreCommand.html
redis-2.2.12/doc/ZscoreCommand.html
redis-2.2.12/doc/ZunionCommand.html
redis-2.2.12/doc/ZunionstoreCommand.html
redis-2.2.12/doc/index.html
redis-2.2.12/doc/redis.png
redis-2.2.12/doc/style.css
redis-2.2.12/redis.conf
redis-2.2.12/src/
redis-2.2.12/src/Makefile
redis-2.2.12/src/adlist.c
redis-2.2.12/src/adlist.h
redis-2.2.12/src/ae.c
redis-2.2.12/src/ae.h
redis-2.2.12/src/ae_epoll.c
redis-2.2.12/src/ae_kqueue.c
redis-2.2.12/src/ae_select.c
redis-2.2.12/src/anet.c
redis-2.2.12/src/anet.h
redis-2.2.12/src/aof.c
redis-2.2.12/src/config.c
redis-2.2.12/src/config.h
redis-2.2.12/src/db.c
redis-2.2.12/src/debug.c
redis-2.2.12/src/dict.c
redis-2.2.12/src/dict.h
redis-2.2.12/src/fmacros.h
redis-2.2.12/src/help.h
redis-2.2.12/src/intset.c
redis-2.2.12/src/intset.h
redis-2.2.12/src/lzf.h
redis-2.2.12/src/lzfP.h
redis-2.2.12/src/lzf_c.c
redis-2.2.12/src/lzf_d.c
redis-2.2.12/src/mkreleasehdr.sh
redis-2.2.12/src/multi.c
redis-2.2.12/src/networking.c
redis-2.2.12/src/object.c
redis-2.2.12/src/pqsort.c
redis-2.2.12/src/pqsort.h
redis-2.2.12/src/pubsub.c
redis-2.2.12/src/rdb.c
redis-2.2.12/src/redis-benchmark.c
redis-2.2.12/src/redis-check-aof.c
redis-2.2.12/src/redis-check-dump.c
redis-2.2.12/src/redis-cli.c
redis-2.2.12/src/redis.c
redis-2.2.12/src/redis.h
redis-2.2.12/src/release.c
redis-2.2.12/src/replication.c
redis-2.2.12/src/sds.c
redis-2.2.12/src/sds.h
redis-2.2.12/src/sha1.c
redis-2.2.12/src/sha1.h
redis-2.2.12/src/slowlog.c
redis-2.2.12/src/slowlog.h
redis-2.2.12/src/solarisfixes.h
redis-2.2.12/src/sort.c
redis-2.2.12/src/syncio.c
redis-2.2.12/src/t_hash.c
redis-2.2.12/src/t_list.c
redis-2.2.12/src/t_set.c
redis-2.2.12/src/t_string.c
redis-2.2.12/src/t_zset.c
redis-2.2.12/src/testhelp.h
redis-2.2.12/src/util.c
redis-2.2.12/src/valgrind.sup
redis-2.2.12/src/version.h
redis-2.2.12/src/vm.c
redis-2.2.12/src/ziplist.c
redis-2.2.12/src/ziplist.h
redis-2.2.12/src/zipmap.c
redis-2.2.12/src/zipmap.h
redis-2.2.12/src/zmalloc.c
redis-2.2.12/src/zmalloc.h
redis-2.2.12/tests/
redis-2.2.12/tests/assets/
redis-2.2.12/tests/assets/default.conf
redis-2.2.12/tests/integration/
redis-2.2.12/tests/integration/aof.tcl
redis-2.2.12/tests/integration/redis-cli.tcl
redis-2.2.12/tests/integration/replication.tcl
redis-2.2.12/tests/support/
redis-2.2.12/tests/support/redis.tcl
redis-2.2.12/tests/support/server.tcl
redis-2.2.12/tests/support/test.tcl
redis-2.2.12/tests/support/tmpfile.tcl
redis-2.2.12/tests/support/util.tcl
redis-2.2.12/tests/test_helper.tcl
redis-2.2.12/tests/tmp/
redis-2.2.12/tests/tmp/.gitignore
redis-2.2.12/tests/unit/
redis-2.2.12/tests/unit/auth.tcl
redis-2.2.12/tests/unit/basic.tcl
redis-2.2.12/tests/unit/cas.tcl
redis-2.2.12/tests/unit/expire.tcl
redis-2.2.12/tests/unit/other.tcl
redis-2.2.12/tests/unit/printver.tcl
redis-2.2.12/tests/unit/protocol.tcl
redis-2.2.12/tests/unit/pubsub.tcl
redis-2.2.12/tests/unit/quit.tcl
redis-2.2.12/tests/unit/slowlog.tcl
redis-2.2.12/tests/unit/sort.tcl
redis-2.2.12/tests/unit/type/
redis-2.2.12/tests/unit/type/hash.tcl
redis-2.2.12/tests/unit/type/list.tcl
redis-2.2.12/tests/unit/type/set.tcl
redis-2.2.12/tests/unit/type/zset.tcl
redis-2.2.12/utils/
redis-2.2.12/utils/build-static-symbols.tcl
redis-2.2.12/utils/generate-command-help.rb
redis-2.2.12/utils/mktarball.sh
redis-2.2.12/utils/redis-copy.rb
redis-2.2.12/utils/redis-sha1.rb
redis-2.2.12/utils/redis_init_script
afan@ubuntu:~$ cd redis-2.2.12/
afan@ubuntu:~/redis-2.2.12$ ls
00-RELEASENOTES   CONTRIBUTING      doc       redis.conf  utils
BUGS              COPYING           INSTALL   src
Changelog         deps              Makefile  tests
client-libraries  design-documents  README    TODO

安装redis
afan@ubuntu:~/redis-2.2.12$ make
cd src && make all
make[1]: 正在进入目录 `/home/afan/redis-2.2.12/src'
MAKE hiredis
cd ../deps/hiredis && make static ARCH=""
make[2]: 正在进入目录 `/home/afan/redis-2.2.12/deps/hiredis'
cc -c -std=c99 -pedantic -O3 -fPIC -Wall -W -Wwrite-strings    -g -ggdb  net.c
cc -c -std=c99 -pedantic -O3 -fPIC -Wall -W -Wwrite-strings    -g -ggdb  hiredis.c
cc -c -std=c99 -pedantic -O3 -fPIC -Wall -W -Wwrite-strings    -g -ggdb  sds.c
cc -c -std=c99 -pedantic -O3 -fPIC -Wall -W -Wwrite-strings    -g -ggdb  async.c
ar rcs libhiredis.a net.o hiredis.o sds.o async.o
make[2]:正在离开目录 `/home/afan/redis-2.2.12/deps/hiredis'
MAKE linenoise
cd ../deps/linenoise && make ARCH=""
make[2]: 正在进入目录 `/home/afan/redis-2.2.12/deps/linenoise'
cc  -c -Wall -W -Os -g linenoise.c
cc  -c -Wall -W -Os -g example.c
cc  -Wall -W -Os -g -o linenoise_example linenoise.o example.o
make[2]:正在离开目录 `/home/afan/redis-2.2.12/deps/linenoise'
    CC ae.o
    CC anet.o
    CC redis-benchmark.o
    CC sds.o
    CC adlist.o
    CC zmalloc.o
MAKE hiredis
cd ../deps/hiredis && make static ARCH=""
make[2]: 正在进入目录 `/home/afan/redis-2.2.12/deps/hiredis'
make[2]: 没有什么可以做的为 `static'。
make[2]:正在离开目录 `/home/afan/redis-2.2.12/deps/hiredis'
    LINK redis-benchmark
    CC redis-cli.o
    CC release.o
    LINK redis-cli
    CC redis-check-dump.o
    CC lzf_c.o
    CC lzf_d.o
    LINK redis-check-dump
    CC redis-check-aof.o
    LINK redis-check-aof
    CC dict.o
    CC redis.o
    CC pqsort.o
    CC zipmap.o
    CC sha1.o
    CC ziplist.o
    CC networking.o
    CC util.o
    CC object.o
    CC db.o
    CC replication.o
    CC rdb.o
    CC t_string.o
    CC t_list.o
    CC t_set.o
    CC t_zset.o
    CC t_hash.o
    CC config.o
    CC aof.o
    CC vm.o
    CC pubsub.o
    CC multi.o
    CC debug.o
    CC sort.o
    CC intset.o
    CC syncio.o
    CC slowlog.o
    LINK redis-server

Hint: To run 'make test' is a good idea ;)

make[1]:正在离开目录 `/home/afan/redis-2.2.12/src'
afan@ubuntu:~/redis-2.2.12$ 

启动server
afan@ubuntu:~/redis-2.2.12/src$ ./redis-server
[2485] 12 Aug 09:41:22 # Warning: no config file specified, using the default config. In order to specify a config file use 'redis-server /path/to/redis.conf'
[2485] 12 Aug 09:41:22 * Server started, Redis version 2.2.12
[2485] 12 Aug 09:41:22 # WARNING overcommit_memory is set to 0! Background save may fail under low memory condition. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect.
[2485] 12 Aug 09:41:22 * The server is now ready to accept connections on port 6379
[2485] 12 Aug 09:41:22 - 0 clients connected (0 slaves), 539544 bytes in use
[2485] 12 Aug 09:41:27 - 0 clients connected (0 slaves), 539544 bytes in use

测试客户端
afan@ubuntu:~/redis-2.2.12/src$ ./redis-cli
redis 127.0.0.1:6379> set name afan
OK
redis 127.0.0.1:6379> get name
"afan"




通过python客户端调用
redis 127.0.0.1:6379> 
afan@ubuntu:~$ python
Python 2.7.1+ (r271:86832, Apr 11 2011, 18:05:24) 
[GCC 4.5.2] on linux2
Type "help", "copyright", "credits" or "license" for more information.
>>> import redis
>>> r_server = redis.Redis("localhost")
>>> r_server.set("name", "afan")
True
>>> r_server.get("name")
'afan'
>>> 


模拟google+ circle功能
建立两个圈子family和soccer
redis 127.0.0.1:6379> sadd circle:jdoe:family users:anna
(integer) 1
redis 127.0.0.1:6379> sadd circle:jdoe:family users:richard
(integer) 1
redis 127.0.0.1:6379> sadd circle:jdoe:family users:mike
(integer) 1
redis 127.0.0.1:6379> sadd circle:jdoe:soccer users:mike
(integer) 1
redis 127.0.0.1:6379> sadd circle:jdoe:soccer users:adam
(integer) 1
redis 127.0.0.1:6379> sadd circle:jdoe:soccer users:toby
(integer) 1
redis 127.0.0.1:6379> sadd circle:jdoe:soccer users:apollo
(integer) 1
计算成员
redis 127.0.0.1:6379> smembers circle:jdoe:family
1) "users:richard"
2) "users:mike"
3) "users:anna"

redis 127.0.0.1:6379> sinter circle:jdoe:family circle:jdoe:soccer
1) "users:mike"
redis 127.0.0.1:6379> sunion circle:jdoe:family circle:jdoe:soccer
1) "users:anna"
2) "users:mike"
3) "users:apollo"
4) "users:adam"
5) "users:richard"
6) "users:toby"

这样,可以很方便的模拟google+的circle功能
参考:


 Posted by at 1:29 下午
8月 102011
 
Ubuntu
测试代码:
#!/usr/bin/env python
import sys
import datetime
# area of space to investigate
x1, x2, y1, y2 = -2.13, 0.77, -1.3, 1.3

# Original code, prints progress (because it is slow)
# Uses complex datatype

def calculate_z_serial_purepython(q, maxiter, z):
    """Pure python with complex datatype, iterating over list of q and z"""
    output = [0] * len(q)
    for i in range(len(q)):
        if i % 1000 == 0:
            # print out some progress info since it is so slow...
            print "%0.2f%% complete" % (1.0/len(q) * i * 100)
        for iteration in range(maxiter):
            z[i] = z[i]*z[i] + q[i]
            if abs(z[i]) > 2.0:
                output[i] = iteration
                break
    return output

def calc_pure_python(show_output):
    # make a list of x and y values which will represent q
    # xx and yy are the co-ordinates, for the default configuration they'll look like:
    # if we have a 1000x1000 plot
    # xx = [-2.13, -2.1242, -2.1184000000000003, ..., 0.7526000000000064, 0.7584000000000064, 0.7642000000000064]
    # yy = [1.3, 1.2948, 1.2895999999999999, ..., -1.2844000000000058, -1.2896000000000059, -1.294800000000006]
    x_step = (float(x2 - x1) / float(w)) * 2
    y_step = (float(y1 - y2) / float(h)) * 2
    x=[]
    y=[]
    ycoord = y2
    while ycoord > y1:
        y.append(ycoord)
        ycoord += y_step
    xcoord = x1
    while xcoord < x2:
        x.append(xcoord)
        xcoord += x_step
    q = []
    for ycoord in y:
        for xcoord in x:
            q.append(complex(xcoord,ycoord))

    z = [0+0j] * len(q)
    print "Total elements:", len(z)
    start_time = datetime.datetime.now()
    output = calculate_z_serial_purepython(q, maxiter, z)
    end_time = datetime.datetime.now()
    secs = end_time - start_time
    print "Main took", secs

    validation_sum = sum(output)
    print "Total sum of elements (for validation):", validation_sum

    if show_output:
        try:
            import Image
            import numpy as nm
            output = nm.array(output)
            output = (output + (256*output) + (256**2)*output) * 8
            im = Image.new("RGB", (w/2, h/2))
            im.fromstring(output.tostring(), "raw", "RGBX", 0, -1)
            im.show()
        except ImportError as err:
            # Bail gracefully if we're using PyPy
            print "Couldn't import Image or numpy:", str(err)

if __name__ == "__main__":
    # get width, height and max iterations from cmd line
    # 'python mandelbrot_pypy.py 100 300'
    w = int(sys.argv[1]) # e.g. 100
    h = int(sys.argv[1]) # e.g. 100
    maxiter = int(sys.argv[2]) # e.g. 300
    
    # we can show_output for Python, not for PyPy
    calc_pure_python(True)

cProfile参考

afan@ubuntu:~/python-code$ python -m cProfile -o rep.prof pure_python.py 1000 1000
Total elements: 250000
0.00% complete
0.40% complete
0.80% complete
1.20% complete
1.60% complete
2.00% complete
2.40% complete
2.80% complete
3.20% complete
3.60% complete
4.00% complete
4.40% complete
4.80% complete
5.20% complete
5.60% complete
6.00% complete
6.40% complete
6.80% complete
7.20% complete
7.60% complete
8.00% complete
8.40% complete
8.80% complete
9.20% complete
9.60% complete
10.00% complete
10.40% complete
10.80% complete
11.20% complete
11.60% complete
12.00% complete
12.40% complete
12.80% complete
13.20% complete
13.60% complete
14.00% complete
14.40% complete
14.80% complete
15.20% complete
15.60% complete
16.00% complete
16.40% complete
16.80% complete
17.20% complete
17.60% complete
18.00% complete
18.40% complete
18.80% complete
19.20% complete
19.60% complete
20.00% complete
20.40% complete
20.80% complete
21.20% complete
21.60% complete
22.00% complete
22.40% complete
22.80% complete
23.20% complete
23.60% complete
24.00% complete
24.40% complete
24.80% complete
25.20% complete
25.60% complete
26.00% complete
26.40% complete
26.80% complete
27.20% complete
27.60% complete
28.00% complete
28.40% complete
28.80% complete
29.20% complete
29.60% complete
30.00% complete
30.40% complete
30.80% complete
31.20% complete
31.60% complete
32.00% complete
32.40% complete
32.80% complete
33.20% complete
33.60% complete
34.00% complete
34.40% complete
34.80% complete
35.20% complete
35.60% complete
36.00% complete
36.40% complete
36.80% complete
37.20% complete
37.60% complete
38.00% complete
38.40% complete
38.80% complete
39.20% complete
39.60% complete
40.00% complete
40.40% complete
40.80% complete
41.20% complete
41.60% complete
42.00% complete
42.40% complete
42.80% complete
43.20% complete
43.60% complete
44.00% complete
44.40% complete
44.80% complete
45.20% complete
45.60% complete
46.00% complete
46.40% complete
46.80% complete
47.20% complete
47.60% complete
48.00% complete
48.40% complete
48.80% complete
49.20% complete
49.60% complete
50.00% complete
50.40% complete
50.80% complete
51.20% complete
51.60% complete
52.00% complete
52.40% complete
52.80% complete
53.20% complete
53.60% complete
54.00% complete
54.40% complete
54.80% complete
55.20% complete
55.60% complete
56.00% complete
56.40% complete
56.80% complete
57.20% complete
57.60% complete
58.00% complete
58.40% complete
58.80% complete
59.20% complete
59.60% complete
60.00% complete
60.40% complete
60.80% complete
61.20% complete
61.60% complete
62.00% complete
62.40% complete
62.80% complete
63.20% complete
63.60% complete
64.00% complete
64.40% complete
64.80% complete
65.20% complete
65.60% complete
66.00% complete
66.40% complete
66.80% complete
67.20% complete
67.60% complete
68.00% complete
68.40% complete
68.80% complete
69.20% complete
69.60% complete
70.00% complete
70.40% complete
70.80% complete
71.20% complete
71.60% complete
72.00% complete
72.40% complete
72.80% complete
73.20% complete
73.60% complete
74.00% complete
74.40% complete
74.80% complete
75.20% complete
75.60% complete
76.00% complete
76.40% complete
76.80% complete
77.20% complete
77.60% complete
78.00% complete
78.40% complete
78.80% complete
79.20% complete
79.60% complete
80.00% complete
80.40% complete
80.80% complete
81.20% complete
81.60% complete
82.00% complete
82.40% complete
82.80% complete
83.20% complete
83.60% complete
84.00% complete
84.40% complete
84.80% complete
85.20% complete
85.60% complete
86.00% complete
86.40% complete
86.80% complete
87.20% complete
87.60% complete
88.00% complete
88.40% complete
88.80% complete
89.20% complete
89.60% complete
90.00% complete
90.40% complete
90.80% complete
91.20% complete
91.60% complete
92.00% complete
92.40% complete
92.80% complete
93.20% complete
93.60% complete
94.00% complete
94.40% complete
94.80% complete
95.20% complete
95.60% complete
96.00% complete
96.40% complete
96.80% complete
97.20% complete
97.60% complete
98.00% complete
98.40% complete
98.80% complete
99.20% complete
99.60% complete
Main took 0:02:34.268042
Total sum of elements (for validation): 1147734
afan@ubuntu:~/python-code$ python
Python 2.7.1+ (r271:86832, Apr 11 2011, 18:05:24) 
[GCC 4.5.2] on linux2
Type "help", "copyright", "credits" or "license" for more information.
>>> import pstats
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
ImportError: No module named pstats

ubuntu安装python pstats模块
afan@ubuntu:~$ sudo apt-get install python-profiler
[sudo] password for afan: 
正在读取软件包列表... 完成
正在分析软件包的依赖关系树       
正在读取状态信息... 完成       
建议安装的软件包:
  python-doc
下列【新】软件包将被安装:
  python-profiler
升级了 0 个软件包,新安装了 1 个软件包,要卸载 0 个软件包,有 122 个软件包未被升级。
需要下载 41.2 kB 的软件包。
解压缩后会消耗掉 233 kB 的额外空间。
获取:1 http://us.archive.ubuntu.com/ubuntu/ natty/multiverse python-profiler all 2.6.6-0ubuntu1 [41.2 kB]
下载 41.2 kB,耗时 2秒 (19.1 kB/s)         
选中了曾被取消选择的软件包 python-profiler。
(正在读取数据库 ... 系统当前共安装有 209452 个文件和目录。)
正在解压缩 python-profiler (从 .../python-profiler_2.6.6-0ubuntu1_all.deb) ...
正在设置 python-profiler (2.6.6-0ubuntu1) ...
>>> import pstats
>>> p = pstats.Stats('rep.prof')
>>> p.sort_stats('cumulative').print_stats(10)
Thu Jul 28 12:00:35 2011    rep.prof

         51927078 function calls (51926888 primitive calls) in 157.339 CPU seconds

   Ordered by: cumulative time
   List reduced from 535 to 10 due to restriction <10>

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.037    0.037  157.343  157.343 pure_python.py:1(<module>)
        1    0.459    0.459  157.306  157.306 pure_python.py:23(calc_pure_python)
        1   94.657   94.657  154.268  154.268 pure_python.py:9(calculate_z_serial_purepython)
 51414667   56.833    0.000   56.833    0.000 {abs}
   250073    2.778    0.000    2.778    0.000 {range}
        1    0.096    0.096    1.986    1.986 /usr/local/lib/python2.7/dist-packages/numpy/__init__.py:106(<module>)
        1    0.026    0.026    1.662    1.662 /usr/local/lib/python2.7/dist-packages/numpy/add_newdocs.py:9(<module>)
        1    0.075    0.075    1.620    1.620 /usr/local/lib/python2.7/dist-packages/numpy/lib/__init__.py:1(<module>)
        1    0.052    0.052    1.029    1.029 /usr/local/lib/python2.7/dist-packages/numpy/lib/type_check.py:3(<module>)
        1    0.366    0.366    0.977    0.977 /usr/local/lib/python2.7/dist-packages/numpy/core/__init__.py:2(<module>)

<pstats.Stats instance at 0xb773ef2c>

afan@ubuntu:~/python-code$ runsnake rep.prof
查看代码性能
afan@ubuntu:~/python-code$ runsnake rep.prof


 Posted by at 3:56 下午
7月 072011
 
import sys

if len(sys.argv) != 2 :
    print 'usage: python quad_file.py file.txt'
    sys.exit( 1 )

def fn( x ) :
    """ return fn(x) = 2x**2 - 4x + 2 """
    return  x * ( 2*x - 4 ) + 2

for x in file( sys.argv[1] ) :
    try :
        # convert the string to a float
        fx = float(x)
        print fx, fn( fx )
    except ValueError :
        print x, 'cannot be converted'

afan@ubuntu:~/python-code$ python quad_file.py data.txt 
1.0 0.0
2.0 2.0
4.0 18.0
8.0 98.0
16.0 450.0
#!/usr/bin/env python
import sys

if len(sys.argv) <= 1 :
    print "usage:", sys.argv[0], "files ... "
    sys.exit(1)

# collate the data into the entries dictionary
entries = {} # empty dictionary
for fn in sys.argv[1:] : 
    f = file( fn )
    for l in f :
        (name,weight) = l.split()
        if name in entries :
            l = entries[ name ]
            l.append( float( weight ) )
        else :
            entries[name] = [ float( weight ) ]
    f.close()

names = entries.keys()
names.sort()

for n in names :
    print "%-10s:" % n,
    for w in entries[ n ] :
        print " %7.2f" % w, 
    print
afan@ubuntu:~/python-code$ python collate.py rat-weight 
r1        :    50.00    61.00    71.00
r2        :    55.00    64.00    76.00
r3        :    70.00    77.00    85.00
r4        :    65.00    75.00    83.00
 Posted by at 7:46 下午
5月 112011
 
CREATE DATABASE `menu`;
USE menu;
DROP TABLE IF EXISTS `fish`;
SET @saved_cs_client = @@character_set_client;
SET character_set_client = utf8;
CREATE TABLE `fish` (
`ID` int(11) NOT NULL auto_increment,
`NAME` varchar(30) NOT NULL default ‘’,
`PRICE` decimal(5,2) NOT NULL default ‘0.00’,
PRIMARY KEY (`ID`)
) ENGINE=MyISAM AUTO_INCREMENT=27 DEFAULT CHARSET=latin1;
SET character_set_client = @saved_cs_client;
LOCK TABLES `fish` WRITE;
INSERT INTO `fish` VALUES (1,’catfish’,’8.50’),(2,’catfish’,’8.50’),(3,’tuna’,’8.00’),(4,’catfish’,’5.00’),(5,’bass’,’6.75’),(6,’haddock’,’6.50’),(7,’salmon’,’9.50’),(8,’trout’,’6.00’),(9,’tuna’,’7.50’),(10,’yellowfin tuna’,’12.00’),(11,’yellowfin tuna’,’13.00’),(12,’tuna’,’7.50’);
UNLOCK TABLES;


mysql> show tables;
+----------------+
| Tables_in_menu |
+----------------+
| fish           |
+----------------+
1 row in set (0.00 sec)

mysql> select * from fish;
+----+----------------+-------+
| ID | NAME           | PRICE |
+----+----------------+-------+
|  1 | catfish        |  8.50 |
|  2 | catfish        |  8.50 |
|  3 | tuna           |  8.00 |
|  4 | catfish        |  5.00 |
|  5 | bass           |  6.75 |
|  6 | haddock        |  6.50 |
|  7 | salmon         |  9.50 |
|  8 | trout          |  6.00 |
|  9 | tuna           |  7.50 |
| 10 | yellowfin tuna | 12.00 |
| 11 | yellowfin tuna | 13.00 |
| 12 | tuna           |  7.50 |
+----+----------------+-------+
12 rows in set (0.03 sec)

mysql> describe fish;
+-------+--------------+------+-----+---------+----------------+
| Field | Type         | Null | Key | Default | Extra          |
+-------+--------------+------+-----+---------+----------------+
| ID    | int(11)      | NO   | PRI | NULL    | auto_increment |
| NAME  | varchar(30)  | NO   |     |         |                |
| PRICE | decimal(5,2) | NO   |     | 0.00    |                |
+-------+--------------+------+-----+---------+----------------+
3 rows in set (0.01 sec)

mysql> select * from fish where id = 5;
+----+------+-------+
| ID | NAME | PRICE |
+----+------+-------+
|  5 | bass |  6.75 |
+----+------+-------+
1 row in set (0.00 sec)

query.py

import MySQLdb
mydb = MySQLdb.connect(host = 'localhost', user = 'root', passwd = '******', db = 'menu')
cur = mydb.cursor()
cur.execute('select * from fish')
results = cur.fetchall()
mydb.close()
for record in results:
    print record[0] , "-->", record[1] , "@", record[2], "each"

afan@ubuntu:~/python-code$ python query.py
1 --> catfish @ 8.50 each
2 --> catfish @ 8.50 each
3 --> tuna @ 8.00 each
4 --> catfish @ 5.00 each
5 --> bass @ 6.75 each
6 --> haddock @ 6.50 each
7 --> salmon @ 9.50 each
8 --> trout @ 6.00 each
9 --> tuna @ 7.50 each
10 --> yellowfin tuna @ 12.00 each
11 --> yellowfin tuna @ 13.00 each
12 --> tuna @ 7.50 each
 Posted by at 5:39 下午